mprobst created this revision. mprobst added a reviewer: krasimir. Herald added a project: clang. Herald added a subscriber: cfe-commits.
The previous change in https://reviews.llvm.org/D77311 attempted to detect more C++ keywords. However it also precisely detected all JavaScript keywords. That's generally correct, but many JavaScripy keywords, e.g. `get`, are so-called pseudo-keywords. They can be used in positions where a keyword would never be legal, e.g. in a dotted expression: x.type; // type is a pseudo-keyword, but can be used here. x.get; // same for get etc. This change introduces an additional parameter to `IsJavaScriptIdentifier`, allowing clients to toggle whether they want to allow `IdentifierName` tokens, i.e. pseudo-keywords. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D77548 Files: clang/lib/Format/FormatToken.h clang/lib/Format/TokenAnnotator.cpp clang/unittests/Format/FormatTestJS.cpp Index: clang/unittests/Format/FormatTestJS.cpp =================================================================== --- clang/unittests/Format/FormatTestJS.cpp +++ clang/unittests/Format/FormatTestJS.cpp @@ -2412,6 +2412,11 @@ verifyFormat("using!;"); verifyFormat("virtual!;"); verifyFormat("wchar_t!;"); + + // Positive tests: + verifyFormat("x.type!;"); + verifyFormat("x.get!;"); + verifyFormat("x.set!;"); } TEST_F(FormatTestJS, NullPropagatingOperator) { Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -1522,9 +1522,11 @@ if (Style.Language == FormatStyle::LK_JavaScript) { if (Current.is(tok::exclaim)) { if (Current.Previous && - (Keywords.IsJavaScriptIdentifier(*Current.Previous) || - Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren, - tok::r_square, tok::r_brace) || + (Keywords.IsJavaScriptIdentifier( + *Current.Previous, /* AcceptIdentifierName= */ true) || + Current.Previous->isOneOf( + tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace, + Keywords.kw_type, Keywords.kw_get, Keywords.kw_set) || Current.Previous->Tok.isLiteral())) { Current.Type = TT_JsNonNullAssertion; return; @@ -3071,7 +3073,9 @@ return false; // In tagged template literals ("html`bar baz`"), there is no space between // the tag identifier and the template string. - if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString)) + if (Keywords.IsJavaScriptIdentifier(Left, + /* AcceptIdentifierName= */ false) && + Right.is(TT_TemplateString)) return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -909,7 +909,11 @@ /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. - bool IsJavaScriptIdentifier(const FormatToken &Tok) const { + /// If \c AcceptIdentifierName is true, returns true not only for keywords, + // but also for IdentifierName tokens (aka pseudo-keywords), such as + // ``yield``. + bool IsJavaScriptIdentifier(const FormatToken &Tok, + bool AcceptIdentifierName = true) const { // Based on the list of JavaScript & TypeScript keywords here: // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74 switch (Tok.Tok.getKind()) { @@ -946,11 +950,14 @@ case tok::kw_while: // These are JS keywords that are lexed by LLVM/clang as keywords. return false; - case tok::identifier: + case tok::identifier: { // For identifiers, make sure they are true identifiers, excluding the // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). - return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == - JsExtraKeywords.end(); + bool IsPseudoKeyword = + JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != + JsExtraKeywords.end(); + return AcceptIdentifierName || !IsPseudoKeyword; + } default: // Other keywords are handled in the switch below, to avoid problems due // to duplicate case labels when using the #include trick.
Index: clang/unittests/Format/FormatTestJS.cpp =================================================================== --- clang/unittests/Format/FormatTestJS.cpp +++ clang/unittests/Format/FormatTestJS.cpp @@ -2412,6 +2412,11 @@ verifyFormat("using!;"); verifyFormat("virtual!;"); verifyFormat("wchar_t!;"); + + // Positive tests: + verifyFormat("x.type!;"); + verifyFormat("x.get!;"); + verifyFormat("x.set!;"); } TEST_F(FormatTestJS, NullPropagatingOperator) { Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -1522,9 +1522,11 @@ if (Style.Language == FormatStyle::LK_JavaScript) { if (Current.is(tok::exclaim)) { if (Current.Previous && - (Keywords.IsJavaScriptIdentifier(*Current.Previous) || - Current.Previous->isOneOf(tok::kw_namespace, tok::r_paren, - tok::r_square, tok::r_brace) || + (Keywords.IsJavaScriptIdentifier( + *Current.Previous, /* AcceptIdentifierName= */ true) || + Current.Previous->isOneOf( + tok::kw_namespace, tok::r_paren, tok::r_square, tok::r_brace, + Keywords.kw_type, Keywords.kw_get, Keywords.kw_set) || Current.Previous->Tok.isLiteral())) { Current.Type = TT_JsNonNullAssertion; return; @@ -3071,7 +3073,9 @@ return false; // In tagged template literals ("html`bar baz`"), there is no space between // the tag identifier and the template string. - if (Keywords.IsJavaScriptIdentifier(Left) && Right.is(TT_TemplateString)) + if (Keywords.IsJavaScriptIdentifier(Left, + /* AcceptIdentifierName= */ false) && + Right.is(TT_TemplateString)) return false; if (Right.is(tok::star) && Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -909,7 +909,11 @@ /// Returns \c true if \p Tok is a true JavaScript identifier, returns /// \c false if it is a keyword or a pseudo keyword. - bool IsJavaScriptIdentifier(const FormatToken &Tok) const { + /// If \c AcceptIdentifierName is true, returns true not only for keywords, + // but also for IdentifierName tokens (aka pseudo-keywords), such as + // ``yield``. + bool IsJavaScriptIdentifier(const FormatToken &Tok, + bool AcceptIdentifierName = true) const { // Based on the list of JavaScript & TypeScript keywords here: // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74 switch (Tok.Tok.getKind()) { @@ -946,11 +950,14 @@ case tok::kw_while: // These are JS keywords that are lexed by LLVM/clang as keywords. return false; - case tok::identifier: + case tok::identifier: { // For identifiers, make sure they are true identifiers, excluding the // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). - return JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == - JsExtraKeywords.end(); + bool IsPseudoKeyword = + JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != + JsExtraKeywords.end(); + return AcceptIdentifierName || !IsPseudoKeyword; + } default: // Other keywords are handled in the switch below, to avoid problems due // to duplicate case labels when using the #include trick.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits