Author: rsmith Date: Thu Apr 11 14:18:22 2019 New Revision: 358230 URL: http://llvm.org/viewvc/llvm-project?rev=358230&view=rev Log: Remove use of lookahead from _Pragma handling and from all other internal lexing steps in the preprocessor.
It is not safe to use the preprocessor's token lookahead except when operating on the final sequence of tokens that would be produced by phase 4 of translation. Doing so corrupts the token lookahead cache used by the parser. (See added testcase for an example.) Lookahead should instead be viewed as a layer on top of the normal lexer. Added assertions to catch any further incorrect uses of lookahead within lexing actions. Added: cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp Modified: cfe/trunk/include/clang/Lex/Preprocessor.h cfe/trunk/lib/Lex/PPCaching.cpp cfe/trunk/lib/Lex/PPDirectives.cpp cfe/trunk/lib/Lex/Pragma.cpp cfe/trunk/lib/Lex/Preprocessor.cpp Modified: cfe/trunk/include/clang/Lex/Preprocessor.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Preprocessor.h?rev=358230&r1=358229&r2=358230&view=diff ============================================================================== --- cfe/trunk/include/clang/Lex/Preprocessor.h (original) +++ cfe/trunk/include/clang/Lex/Preprocessor.h Thu Apr 11 14:18:22 2019 @@ -323,6 +323,14 @@ class Preprocessor { /// to avoid hitting the same error over and over again. bool HasReachedMaxIncludeDepth = false; + /// The number of currently-active calls to Lex. + /// + /// Lex is reentrant, and asking for an (end-of-phase-4) token can often + /// require asking for multiple additional tokens. This counter makes it + /// possible for Lex to detect whether it's producing a token for the end + /// of phase 4 of translation or for some other situation. + unsigned LexLevel = 0; + public: struct PreambleSkipInfo { SourceLocation HashTokenLoc; @@ -1244,24 +1252,6 @@ public: /// Disable the last EnableBacktrackAtThisPos call. void CommitBacktrackedTokens(); - struct CachedTokensRange { - CachedTokensTy::size_type Begin, End; - }; - -private: - /// A range of cached tokens that should be erased after lexing - /// when backtracking requires the erasure of such cached tokens. - Optional<CachedTokensRange> CachedTokenRangeToErase; - -public: - /// Returns the range of cached tokens that were lexed since - /// EnableBacktrackAtThisPos() was previously called. - CachedTokensRange LastCachedTokenRange(); - - /// Erase the range of cached tokens that were lexed since - /// EnableBacktrackAtThisPos() was previously called. - void EraseCachedTokens(CachedTokensRange TokenRange); - /// Make Preprocessor re-lex the tokens that were lexed since /// EnableBacktrackAtThisPos() was previously called. void Backtrack(); @@ -1353,6 +1343,7 @@ public: /// tokens after phase 5. As such, it is equivalent to using /// 'Lex', not 'LexUnexpandedToken'. const Token &LookAhead(unsigned N) { + assert(LexLevel == 0 && "cannot use lookahead while lexing"); if (CachedLexPos + N < CachedTokens.size()) return CachedTokens[CachedLexPos+N]; else @@ -1379,8 +1370,16 @@ public: /// If BackTrack() is called afterwards, the token will remain at the /// insertion point. void EnterToken(const Token &Tok) { - EnterCachingLexMode(); - CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); + if (LexLevel) { + // It's not correct in general to enter caching lex mode while in the + // middle of a nested lexing action. + auto TokCopy = llvm::make_unique<Token[]>(1); + TokCopy[0] = Tok; + EnterTokenStream(std::move(TokCopy), 1, true); + } else { + EnterCachingLexMode(); + CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok); + } } /// We notify the Preprocessor that if it is caching tokens (because @@ -2062,6 +2061,7 @@ private: } void EnterCachingLexMode(); + void EnterCachingLexModeUnchecked(); void ExitCachingLexMode() { if (InCachingLexMode()) Modified: cfe/trunk/lib/Lex/PPCaching.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPCaching.cpp?rev=358230&r1=358229&r2=358230&view=diff ============================================================================== --- cfe/trunk/lib/Lex/PPCaching.cpp (original) +++ cfe/trunk/lib/Lex/PPCaching.cpp Thu Apr 11 14:18:22 2019 @@ -23,6 +23,7 @@ using namespace clang; // be called multiple times and CommitBacktrackedTokens/Backtrack calls will // be combined with the EnableBacktrackAtThisPos calls in reverse order. void Preprocessor::EnableBacktrackAtThisPos() { + assert(LexLevel == 0 && "cannot use lookahead while lexing"); BacktrackPositions.push_back(CachedLexPos); EnterCachingLexMode(); } @@ -34,29 +35,6 @@ void Preprocessor::CommitBacktrackedToke BacktrackPositions.pop_back(); } -Preprocessor::CachedTokensRange Preprocessor::LastCachedTokenRange() { - assert(isBacktrackEnabled()); - auto PrevCachedLexPos = BacktrackPositions.back(); - return CachedTokensRange{PrevCachedLexPos, CachedLexPos}; -} - -void Preprocessor::EraseCachedTokens(CachedTokensRange TokenRange) { - assert(TokenRange.Begin <= TokenRange.End); - if (CachedLexPos == TokenRange.Begin && TokenRange.Begin != TokenRange.End) { - // We have backtracked to the start of the token range as we want to consume - // them again. Erase the tokens only after consuming then. - assert(!CachedTokenRangeToErase); - CachedTokenRangeToErase = TokenRange; - return; - } - // The cached tokens were committed, so they should be erased now. - assert(TokenRange.End == CachedLexPos); - CachedTokens.erase(CachedTokens.begin() + TokenRange.Begin, - CachedTokens.begin() + TokenRange.End); - CachedLexPos = TokenRange.Begin; - ExitCachingLexMode(); -} - // Make Preprocessor re-lex the tokens that were lexed since // EnableBacktrackAtThisPos() was previously called. void Preprocessor::Backtrack() { @@ -71,15 +49,12 @@ void Preprocessor::CachingLex(Token &Res if (!InCachingLexMode()) return; + // The assert in EnterCachingLexMode should prevent this from happening. + assert(LexLevel == 1 && + "should not use token caching within the preprocessor"); + if (CachedLexPos < CachedTokens.size()) { Result = CachedTokens[CachedLexPos++]; - // Erase the some of the cached tokens after they are consumed when - // asked to do so. - if (CachedTokenRangeToErase && - CachedTokenRangeToErase->End == CachedLexPos) { - EraseCachedTokens(*CachedTokenRangeToErase); - CachedTokenRangeToErase = None; - } return; } @@ -88,14 +63,14 @@ void Preprocessor::CachingLex(Token &Res if (isBacktrackEnabled()) { // Cache the lexed token. - EnterCachingLexMode(); + EnterCachingLexModeUnchecked(); CachedTokens.push_back(Result); ++CachedLexPos; return; } if (CachedLexPos < CachedTokens.size()) { - EnterCachingLexMode(); + EnterCachingLexModeUnchecked(); } else { // All cached tokens were consumed. CachedTokens.clear(); @@ -104,11 +79,23 @@ void Preprocessor::CachingLex(Token &Res } void Preprocessor::EnterCachingLexMode() { + // The caching layer sits on top of all the other lexers, so it's incorrect + // to cache tokens while inside a nested lex action. The cached tokens would + // be retained after returning to the enclosing lex action and, at best, + // would appear at the wrong position in the token stream. + assert(LexLevel == 0 && + "entered caching lex mode while lexing something else"); + if (InCachingLexMode()) { assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind"); return; } + EnterCachingLexModeUnchecked(); +} + +void Preprocessor::EnterCachingLexModeUnchecked() { + assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode"); PushIncludeMacroStack(); CurLexerKind = CLK_CachingLexer; } Modified: cfe/trunk/lib/Lex/PPDirectives.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPDirectives.cpp?rev=358230&r1=358229&r2=358230&view=diff ============================================================================== --- cfe/trunk/lib/Lex/PPDirectives.cpp (original) +++ cfe/trunk/lib/Lex/PPDirectives.cpp Thu Apr 11 14:18:22 2019 @@ -829,10 +829,10 @@ void Preprocessor::HandleSkippedDirectiv return HandleIncludeDirective(HashLoc, Result); } if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) { - Token P = LookAhead(0); - auto *II = P.getIdentifierInfo(); + Lex(Result); + auto *II = Result.getIdentifierInfo(); if (II && II->getName() == "hdrstop") - return HandlePragmaDirective(HashLoc, PIK_HashPragma); + return HandlePragmaHdrstop(Result); } } DiscardUntilEndOfDirective(); Modified: cfe/trunk/lib/Lex/Pragma.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Pragma.cpp?rev=358230&r1=358229&r2=358230&view=diff ============================================================================== --- cfe/trunk/lib/Lex/Pragma.cpp (original) +++ cfe/trunk/lib/Lex/Pragma.cpp Thu Apr 11 14:18:22 2019 @@ -144,84 +144,72 @@ void Preprocessor::HandlePragmaDirective DiscardUntilEndOfDirective(); } -namespace { - -/// Helper class for \see Preprocessor::Handle_Pragma. -class LexingFor_PragmaRAII { - Preprocessor &PP; - bool InMacroArgPreExpansion; - bool Failed = false; - Token &OutTok; - Token PragmaTok; - -public: - LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion, - Token &Tok) - : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) { - if (InMacroArgPreExpansion) { - PragmaTok = OutTok; - PP.EnableBacktrackAtThisPos(); - } - } - - ~LexingFor_PragmaRAII() { - if (InMacroArgPreExpansion) { - // When committing/backtracking the cached pragma tokens in a macro - // argument pre-expansion we want to ensure that either the tokens which - // have been committed will be removed from the cache or that the tokens - // over which we just backtracked won't remain in the cache after they're - // consumed and that the caching will stop after consuming them. - // Otherwise the caching will interfere with the way macro expansion - // works, because we will continue to cache tokens after consuming the - // backtracked tokens, which shouldn't happen when we're dealing with - // macro argument pre-expansion. - auto CachedTokenRange = PP.LastCachedTokenRange(); - if (Failed) { - PP.CommitBacktrackedTokens(); - } else { - PP.Backtrack(); - OutTok = PragmaTok; - } - PP.EraseCachedTokens(CachedTokenRange); - } - } - - void failed() { - Failed = true; - } -}; - -} // namespace - /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then /// return the first token after the directive. The _Pragma token has just /// been read into 'Tok'. void Preprocessor::Handle_Pragma(Token &Tok) { - // This works differently if we are pre-expanding a macro argument. - // In that case we don't actually "activate" the pragma now, we only lex it - // until we are sure it is lexically correct and then we backtrack so that - // we activate the pragma whenever we encounter the tokens again in the token - // stream. This ensures that we will activate it in the correct location - // or that we will ignore it if it never enters the token stream, e.g: + // C11 6.10.3.4/3: + // all pragma unary operator expressions within [a completely + // macro-replaced preprocessing token sequence] are [...] processed [after + // rescanning is complete] + // + // This means that we execute _Pragma operators in two cases: + // + // 1) on token sequences that would otherwise be produced as the output of + // phase 4 of preprocessing, and + // 2) on token sequences formed as the macro-replaced token sequence of a + // macro argument // - // #define EMPTY(x) - // #define INACTIVE(x) EMPTY(x) - // INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\"")) + // Case #2 appears to be a wording bug: only _Pragmas that would survive to + // the end of phase 4 should actually be executed. Discussion on the WG14 + // mailing list suggests that a _Pragma operator is notionally checked early, + // but only pragmas that survive to the end of phase 4 should be executed. + // + // In Case #2, we check the syntax now, but then put the tokens back into the + // token stream for later consumption. + + struct TokenCollector { + Preprocessor &Self; + bool Collect; + SmallVector<Token, 3> Tokens; + Token &Tok; + + void lex() { + if (Collect) + Tokens.push_back(Tok); + Self.Lex(Tok); + } + + void revert() { + assert(Collect && "did not collect tokens"); + assert(!Tokens.empty() && "collected unexpected number of tokens"); + + // Push the ( "string" ) tokens into the token stream. + auto Toks = llvm::make_unique<Token[]>(Tokens.size()); + std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get()); + Toks[Tokens.size() - 1] = Tok; + Self.EnterTokenStream(std::move(Toks), Tokens.size(), + /*DisableMacroExpansion*/ true); + + // ... and return the _Pragma token unchanged. + Tok = *Tokens.begin(); + } + }; - LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok); + TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok}; // Remember the pragma token location. SourceLocation PragmaLoc = Tok.getLocation(); // Read the '('. - Lex(Tok); + Toks.lex(); if (Tok.isNot(tok::l_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return _PragmaLexing.failed(); + return; } // Read the '"..."'. - Lex(Tok); + Toks.lex(); if (!tok::isStringLiteral(Tok.getKind())) { Diag(PragmaLoc, diag::err__Pragma_malformed); // Skip bad tokens, and the ')', if present. @@ -233,7 +221,7 @@ void Preprocessor::Handle_Pragma(Token & Lex(Tok); if (Tok.is(tok::r_paren)) Lex(Tok); - return _PragmaLexing.failed(); + return; } if (Tok.hasUDSuffix()) { @@ -242,21 +230,24 @@ void Preprocessor::Handle_Pragma(Token & Lex(Tok); if (Tok.is(tok::r_paren)) Lex(Tok); - return _PragmaLexing.failed(); + return; } // Remember the string. Token StrTok = Tok; // Read the ')'. - Lex(Tok); + Toks.lex(); if (Tok.isNot(tok::r_paren)) { Diag(PragmaLoc, diag::err__Pragma_malformed); - return _PragmaLexing.failed(); + return; } - if (InMacroArgPreExpansion) + // If we're expanding a macro argument, put the tokens back. + if (InMacroArgPreExpansion) { + Toks.revert(); return; + } SourceLocation RParenLoc = Tok.getLocation(); std::string StrVal = getSpelling(StrTok); Modified: cfe/trunk/lib/Lex/Preprocessor.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Preprocessor.cpp?rev=358230&r1=358229&r2=358230&view=diff ============================================================================== --- cfe/trunk/lib/Lex/Preprocessor.cpp (original) +++ cfe/trunk/lib/Lex/Preprocessor.cpp Thu Apr 11 14:18:22 2019 @@ -862,6 +862,8 @@ bool Preprocessor::HandleIdentifier(Toke } void Preprocessor::Lex(Token &Result) { + ++LexLevel; + // We loop here until a lex function returns a token; this avoids recursion. bool ReturnedToken; do { @@ -893,6 +895,7 @@ void Preprocessor::Lex(Token &Result) { } LastTokenWasAt = Result.is(tok::at); + --LexLevel; } /// Lex a header-name token (including one formed from header-name-tokens if Added: cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp?rev=358230&view=auto ============================================================================== --- cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp (added) +++ cfe/trunk/test/Preprocessor/_Pragma-in-macro-arg.cpp Thu Apr 11 14:18:22 2019 @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 %s -verify -Wconversion + +#define P(X) _Pragma(#X) +#define V(X) X + +#define X \ + P(clang diagnostic push) \ + P(clang diagnostic ignored "-Wconversion") \ + ) = 1.2; \ + P(clang diagnostic pop) + +void f() { + int a = 1.2; // expected-warning {{changes value}} + + // Note, we intentionally enter a tentatively-parsed context here to trigger + // regular use of lookahead. This would go wrong if _Pragma checking in macro + // argument pre-expansion also tries to use token lookahead. + int (b + V(X) + + int c = 1.2; // expected-warning {{changes value}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits