hokein updated this revision to Diff 415387. hokein marked 4 inline comments as done. hokein added a comment.
address comments Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D121678/new/ https://reviews.llvm.org/D121678 Files: clang/include/clang/Tooling/Syntax/Pseudo/Token.h clang/lib/Tooling/Syntax/Pseudo/Lex.cpp clang/lib/Tooling/Syntax/Pseudo/Token.cpp clang/lib/Tooling/Syntax/Pseudo/cxx.bnf clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp
Index: clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp =================================================================== --- clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp +++ clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp @@ -172,6 +172,25 @@ })); } +TEST(TokenTest, SplitGreaterGreater) { + LangOptions Opts; + std::string Code = R"cpp( +>> // split +// >> with an escaped newline in the middle, split +>\ +> +>>= // not split +)cpp"; + TokenStream Split = stripComments(cook(lex(Code, Opts), Opts)); + EXPECT_THAT(Split.tokens(), ElementsAreArray({ + token(">", tok::greater), + token(">", tok::greater), + token(">", tok::greater), + token(">", tok::greater), + token(">>=", tok::greatergreaterequal), + })); +} + TEST(TokenTest, DropComments) { LangOptions Opts; std::string Code = R"cpp( Index: clang/lib/Tooling/Syntax/Pseudo/cxx.bnf =================================================================== --- clang/lib/Tooling/Syntax/Pseudo/cxx.bnf +++ clang/lib/Tooling/Syntax/Pseudo/cxx.bnf @@ -13,6 +13,9 @@ # - the file merely describes the core C++ grammar. Preprocessor directives and # lexical conversions are omitted as we reuse clang's lexer and run a fake # preprocessor; +# - grammar rules with the >> token are adjusted, the greatergreater token is +# split into two > tokens, to make the GLR parser aware of nested templates +# and right shift operator. # # Guidelines: # - non-terminals are lower_case; terminals (aka tokens) correspond to @@ -96,7 +99,7 @@ fold-operator := ^ fold-operator := | fold-operator := << -fold-operator := >> +fold-operator := greatergreater fold-operator := += fold-operator := -= fold-operator := *= @@ -202,7 +205,7 @@ # expr.shift shift-expression := additive-expression shift-expression := shift-expression << additive-expression -shift-expression := shift-expression >> additive-expression +shift-expression := shift-expression greatergreater additive-expression # expr.spaceship compare-expression := shift-expression compare-expression := compare-expression <=> shift-expression @@ -615,7 +618,7 @@ operator-name := ^^ operator-name := || operator-name := << -operator-name := >> +operator-name := greatergreater operator-name := <<= operator-name := >>= operator-name := ++ @@ -737,3 +740,8 @@ module-keyword := IDENTIFIER import-keyword := IDENTIFIER export-keyword := IDENTIFIER + +#! greatergreater token -- clang lexer always lexes it as a single token, we +#! split it into two tokens to make the GLR parser aware of the nested-template +#! case. +greatergreater := > > Index: clang/lib/Tooling/Syntax/Pseudo/Token.cpp =================================================================== --- clang/lib/Tooling/Syntax/Pseudo/Token.cpp +++ clang/lib/Tooling/Syntax/Pseudo/Token.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/Pseudo/Token.h" +#include "clang/Basic/TokenKinds.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" Index: clang/lib/Tooling/Syntax/Pseudo/Lex.cpp =================================================================== --- clang/lib/Tooling/Syntax/Pseudo/Lex.cpp +++ clang/lib/Tooling/Syntax/Pseudo/Lex.cpp @@ -99,10 +99,21 @@ Tok.Length = Text.size(); Tok.Flags &= ~static_cast<decltype(Tok.Flags)>(LexFlags::NeedsCleaning); } - // Cook raw_identifiers into identifier, keyword, etc. - if (Tok.Kind == tok::raw_identifier) + + if (Tok.Kind == tok::raw_identifier) { + // Cook raw_identifiers into identifier, keyword, etc. Tok.Kind = Identifiers.get(Tok.text()).getTokenID(); - Result.push(std::move(Tok)); + } else if (Tok.Kind == tok::greatergreater) { + // Split the greatergreater token. + // FIXME: split lessless token to support Cuda triple angle brackets <<<. + assert(Tok.text() == ">>"); + Tok.Kind = tok::greater; + Tok.Length = 1; + Result.push(Tok); + // Line is wrong if the first greater is followed by an escaped newline! + Tok.Data = Tok.text().data() + 1; + } + Result.push((Tok)); } Result.finalize(); Index: clang/include/clang/Tooling/Syntax/Pseudo/Token.h =================================================================== --- clang/include/clang/Tooling/Syntax/Pseudo/Token.h +++ clang/include/clang/Tooling/Syntax/Pseudo/Token.h @@ -181,7 +181,8 @@ NeedsCleaning = 1 << 1, }; -/// Derives a token stream by decoding escapes and interpreting raw_identifiers. +/// Derives a token stream by decoding escapes, interpreting raw_identifiers and +/// splitting the greatergreater token. /// /// Tokens containing UCNs, escaped newlines, trigraphs etc are decoded and /// their backing data is owned by the returned stream.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits