https://github.com/Fznamznon updated https://github.com/llvm/llvm-project/pull/97274
>From 4d5008fcf3ac37fa213c8f2cf42c3cce6369c83d Mon Sep 17 00:00:00 2001 From: "Podchishchaeva, Mariya" <mariya.podchishcha...@intel.com> Date: Thu, 20 Jun 2024 06:04:07 -0700 Subject: [PATCH 1/2] [clang] Inject tokens containing #embed back into token stream Instead of playing "whack a mole" with places where #embed should be expanded as comma-separated list, just inject each byte as a token back into the stream, separated by commas. --- clang/include/clang/Basic/TokenKinds.def | 3 ++ clang/include/clang/Basic/TokenKinds.h | 2 +- clang/include/clang/Lex/Preprocessor.h | 5 +- clang/include/clang/Parse/Parser.h | 3 +- clang/lib/Parse/ParseExpr.cpp | 53 ++++++++++----------- clang/lib/Parse/ParseTemplate.cpp | 41 +++++----------- clang/lib/Sema/SemaExpr.cpp | 6 ++- clang/test/Preprocessor/embed_codegen.cpp | 3 +- clang/test/Preprocessor/embed_constexpr.cpp | 3 +- clang/test/Preprocessor/embed_weird.cpp | 21 ++++---- 10 files changed, 63 insertions(+), 77 deletions(-) diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 37d570ca5e75b..1bc9c59576f33 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -165,6 +165,9 @@ TOK(raw_identifier) // Used only in raw lexing mode. // C99 6.4.4.2: Floating Constants TOK(numeric_constant) // 0x123 +// Directly holds numerical value. Used to process C23 #embed. +TOK(binary_data) + // C99 6.4.4: Character Constants TOK(char_constant) // 'a' TOK(wide_char_constant) // L'b' diff --git a/clang/include/clang/Basic/TokenKinds.h b/clang/include/clang/Basic/TokenKinds.h index e5183a27d2bc5..1b133dde89587 100644 --- a/clang/include/clang/Basic/TokenKinds.h +++ b/clang/include/clang/Basic/TokenKinds.h @@ -98,7 +98,7 @@ inline bool isLiteral(TokenKind K) { return K == tok::numeric_constant || K == tok::char_constant || K == tok::wide_char_constant || K == tok::utf8_char_constant || K == tok::utf16_char_constant || K == tok::utf32_char_constant || - isStringLiteral(K) || K == tok::header_name; + isStringLiteral(K) || K == tok::header_name || K == tok::binary_data; } /// Return true if this is any of tok::annot_* kinds. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index be3334b980746..8e30756da2a01 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2123,8 +2123,9 @@ class Preprocessor { char getSpellingOfSingleCharacterNumericConstant(const Token &Tok, bool *Invalid = nullptr) const { - assert(Tok.is(tok::numeric_constant) && - Tok.getLength() == 1 && "Called on unsupported token"); + assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) && + Tok.getLength() == 1 && + "Called on unsupported token"); assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); // If the token is carrying a literal data pointer, just use it. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 6880fa4bb0b03..7bc2280764c5b 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2123,7 +2123,7 @@ class Parser : public CodeCompletionHandler { }; ExprResult ParseInitializerWithPotentialDesignator(DesignatorCompletionInfo); ExprResult createEmbedExpr(); - void ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs); + void injectEmbedTokens(); //===--------------------------------------------------------------------===// // clang Expressions @@ -3830,7 +3830,6 @@ class Parser : public CodeCompletionHandler { AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS, ImplicitTypenameContext AllowImplicitTypename, bool IsClassName = false); - void ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs); bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs, TemplateTy Template, SourceLocation OpenLoc); ParsedTemplateArgument ParseTemplateTemplateArgument(); diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 9fc3cd73f73a0..a3b800a35b55e 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1018,6 +1018,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, // primary-expression case tok::numeric_constant: + case tok::binary_data: // constant: integer-constant // constant: floating-constant @@ -1067,18 +1068,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, } case tok::annot_embed: { - // We've met #embed in a context where a single value is expected. Take last - // element from #embed data as if it were a comma expression. - EmbedAnnotationData *Data = - reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue()); - SourceLocation StartLoc = ConsumeAnnotationToken(); - ASTContext &Context = Actions.getASTContext(); - Res = IntegerLiteral::Create(Context, - llvm::APInt(CHAR_BIT, Data->BinaryData.back()), - Context.UnsignedCharTy, StartLoc); - if (Data->BinaryData.size() > 1) - Diag(StartLoc, diag::warn_unused_comma_left_operand); - break; + injectEmbedTokens(); + return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast, + isVectorLiteral, NotPrimaryExpression); } case tok::kw___super: @@ -3578,15 +3570,29 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS, T.getCloseLocation()); } -void Parser::ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs) { +void Parser::injectEmbedTokens() { EmbedAnnotationData *Data = reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue()); - SourceLocation StartLoc = ConsumeAnnotationToken(); - ASTContext &Context = Actions.getASTContext(); - for (auto Byte : Data->BinaryData) { - Exprs.push_back(IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte), - Context.UnsignedCharTy, StartLoc)); + MutableArrayRef<Token> Toks( + PP.getPreprocessorAllocator().Allocate<Token>(Data->BinaryData.size() * 2 - 1), + Data->BinaryData.size() * 2 - 1); + unsigned I = 0; + for (auto &Byte : Data->BinaryData) { + Toks[I].startToken(); + Toks[I].setKind(tok::binary_data); + Toks[I].setLocation(Tok.getLocation()); + Toks[I].setLength(1); + Toks[I].setLiteralData(&Byte); + if (I != ((Data->BinaryData.size() - 1) * 2)) { + Toks[I + 1].startToken(); + Toks[I + 1].setKind(tok::comma); + Toks[I + 1].setLocation(Tok.getLocation()); + } + I += 2; } + PP.EnterTokenStream(std::move(Toks), /*DisableMacroExpansion=*/true, + /*IsReinject=*/false); + ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true); } /// ParseExpressionList - Used for C/C++ (argument-)expression-list. @@ -3624,17 +3630,8 @@ bool Parser::ParseExpressionList(SmallVectorImpl<Expr *> &Exprs, if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) { Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); Expr = ParseBraceInitializer(); - } else if (Tok.is(tok::annot_embed)) { - ExpandEmbedDirective(Exprs); - if (Tok.isNot(tok::comma)) - break; - Token Comma = Tok; - ConsumeToken(); - checkPotentialAngleBracketDelimiter(Comma); - continue; - } else { + } else Expr = ParseAssignmentExpression(); - } if (EarlyTypoCorrection) Expr = Actions.CorrectDelayedTyposInExpr(Expr); diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index 7e30afa2c64a4..a5130f56600e5 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -1523,19 +1523,6 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() { ExprArg.get(), Loc); } -void Parser::ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs) { - EmbedAnnotationData *Data = - reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue()); - SourceLocation StartLoc = ConsumeAnnotationToken(); - ASTContext &Context = Actions.getASTContext(); - for (auto Byte : Data->BinaryData) { - Expr *E = IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte), - Context.UnsignedCharTy, StartLoc); - TemplateArgs.push_back( - ParsedTemplateArgument(ParsedTemplateArgument::NonType, E, StartLoc)); - } -} - /// ParseTemplateArgumentList - Parse a C++ template-argument-list /// (C++ [temp.names]). Returns true if there was an error. /// @@ -1560,24 +1547,20 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs, do { PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp); - if (Tok.is(tok::annot_embed)) { - ExpandEmbedIntoTemplateArgList(TemplateArgs); - } else { - ParsedTemplateArgument Arg = ParseTemplateArgument(); - SourceLocation EllipsisLoc; - if (TryConsumeToken(tok::ellipsis, EllipsisLoc)) - Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc); - - if (Arg.isInvalid()) { - if (PP.isCodeCompletionReached() && !CalledSignatureHelp) - RunSignatureHelp(); - return true; - } - - // Save this template argument. - TemplateArgs.push_back(Arg); + ParsedTemplateArgument Arg = ParseTemplateArgument(); + SourceLocation EllipsisLoc; + if (TryConsumeToken(tok::ellipsis, EllipsisLoc)) + Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc); + + if (Arg.isInvalid()) { + if (PP.isCodeCompletionReached() && !CalledSignatureHelp) + RunSignatureHelp(); + return true; } + // Save this template argument. + TemplateArgs.push_back(Arg); + // If the next token is a comma, consume it and keep reading // arguments. } while (TryConsumeToken(tok::comma)); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index db44cfe1288b6..c5657b2389cd2 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3722,9 +3722,11 @@ bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) { ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { // Fast path for a single digit (which is quite common). A single digit // cannot have a trigraph, escaped newline, radix prefix, or suffix. - if (Tok.getLength() == 1) { + if (Tok.getLength() == 1 || Tok.getKind() == tok::binary_data) { const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok); - return ActOnIntegerConstant(Tok.getLocation(), Val-'0'); + return ActOnIntegerConstant( + Tok.getLocation(), + (Tok.getKind() == tok::binary_data) ? Val : Val - '0'); } SmallString<128> SpellingBuffer; diff --git a/clang/test/Preprocessor/embed_codegen.cpp b/clang/test/Preprocessor/embed_codegen.cpp index 64110afc162d7..201bf300bc669 100644 --- a/clang/test/Preprocessor/embed_codegen.cpp +++ b/clang/test/Preprocessor/embed_codegen.cpp @@ -43,8 +43,9 @@ a }; // CHECK: store i32 107, ptr %b, align 4 -int b = +int b = ( #embed<jk.txt> + ) ; diff --git a/clang/test/Preprocessor/embed_constexpr.cpp b/clang/test/Preprocessor/embed_constexpr.cpp index 1cadff76b4890..a7857641a2e8d 100644 --- a/clang/test/Preprocessor/embed_constexpr.cpp +++ b/clang/test/Preprocessor/embed_constexpr.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions // RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions +// expected-no-diagnostics constexpr int value(int a, int b) { return a + b; @@ -46,7 +47,7 @@ int array[ static_assert(sizeof(array) / sizeof(int) == 'j'); constexpr int comma_expr = ( -#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}} +#embed <jk.txt> ); static_assert(comma_expr == 'k'); diff --git a/clang/test/Preprocessor/embed_weird.cpp b/clang/test/Preprocessor/embed_weird.cpp index 31b622c848d6a..cc73a88e5a657 100644 --- a/clang/test/Preprocessor/embed_weird.cpp +++ b/clang/test/Preprocessor/embed_weird.cpp @@ -27,7 +27,7 @@ _Static_assert( _Static_assert(sizeof( #embed <single_byte.txt> ) == -sizeof(unsigned char) +sizeof(int) , "" ); _Static_assert(sizeof @@ -35,9 +35,9 @@ _Static_assert(sizeof , "" ); _Static_assert(sizeof( -#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}} +#embed <jk.txt> ) == -sizeof(unsigned char) +sizeof(int) , "" ); @@ -73,10 +73,10 @@ void do_stuff() { // Ensure that we don't accidentally allow you to initialize an unsigned char * // from embedded data; the data is modeled as a string literal internally, but // is not actually a string literal. -const unsigned char *ptr = +const unsigned char *ptr = ( #embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}} -; // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \ - cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}} + ); // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'int'}} \ + cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'int'}} // However, there are some cases where this is fine and should work. const unsigned char *null_ptr_1 = @@ -101,11 +101,10 @@ constexpr unsigned char ch = ; static_assert(ch == 0); -void foobar(float x, char y, char z); // cxx-note {{candidate function not viable: requires 3 arguments, but 1 was provided}} - // c-note@-1 {{declared here}} -void g1() { foobar((float) // cxx-error {{no matching function for call to 'foobar'}} -#embed "numbers.txt" limit(3) // expected-warning {{left operand of comma operator has no effect}} -); // c-error {{too few arguments to function call, expected 3, have 1}} +void foobar(float x, char y, char z); +void g1() { foobar((float) +#embed "numbers.txt" limit(3) +); } #if __cplusplus >From f34179d9f319add9144ac521f5e16448a4390844 Mon Sep 17 00:00:00 2001 From: "Podchishchaeva, Mariya" <mariya.podchishcha...@intel.com> Date: Mon, 8 Jul 2024 00:47:17 -0700 Subject: [PATCH 2/2] Fix format --- clang/include/clang/Lex/Preprocessor.h | 3 +-- clang/lib/Parse/ParseExpr.cpp | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 8e30756da2a01..591c1fccf9a8f 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2124,8 +2124,7 @@ class Preprocessor { getSpellingOfSingleCharacterNumericConstant(const Token &Tok, bool *Invalid = nullptr) const { assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) && - Tok.getLength() == 1 && - "Called on unsupported token"); + Tok.getLength() == 1 && "Called on unsupported token"); assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1"); // If the token is carrying a literal data pointer, just use it. diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 2559ddf268964..fa24cf13b12cc 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -3574,9 +3574,9 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS, void Parser::injectEmbedTokens() { EmbedAnnotationData *Data = reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue()); - MutableArrayRef<Token> Toks( - PP.getPreprocessorAllocator().Allocate<Token>(Data->BinaryData.size() * 2 - 1), - Data->BinaryData.size() * 2 - 1); + MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>( + Data->BinaryData.size() * 2 - 1), + Data->BinaryData.size() * 2 - 1); unsigned I = 0; for (auto &Byte : Data->BinaryData) { Toks[I].startToken(); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits