Author: Yitzhak Mandelbaum Date: 2020-06-19T01:11:29Z New Revision: 9ca50e887db7f903c04a90593d2beed8a96794f1
URL: https://github.com/llvm/llvm-project/commit/9ca50e887db7f903c04a90593d2beed8a96794f1 DIFF: https://github.com/llvm/llvm-project/commit/9ca50e887db7f903c04a90593d2beed8a96794f1.diff LOG: [libTooling] Add parser for string representation of `RangeSelector`. This patch adds a parser for a `RangeSelector` written as a string. The format is closely based on the way one would right the selector in C++. This should enable use of `RangeSelector`s from tools like clang-query and web UIs. Added: clang/include/clang/Tooling/Transformer/Parsing.h clang/lib/Tooling/Transformer/Parsing.cpp Modified: clang/lib/Tooling/Transformer/CMakeLists.txt clang/unittests/Tooling/RangeSelectorTest.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Tooling/Transformer/Parsing.h b/clang/include/clang/Tooling/Transformer/Parsing.h new file mode 100644 index 000000000000..8e51f595cd5b --- /dev/null +++ b/clang/include/clang/Tooling/Transformer/Parsing.h @@ -0,0 +1,41 @@ +//===--- Parsing.h - Parsing library for Transformer ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines parsing functions for Transformer types. +/// FIXME: Currently, only supports `RangeSelectors` but parsers for other +/// Transformer types are under development. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ +#define LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "llvm/Support/Error.h" +#include <functional> +#include <string> + +namespace clang { +namespace transformer { + +/// Parses a string representation of a \c RangeSelector. The grammar of these +/// strings is closely based on the (sub)grammar of \c RangeSelectors as they'd +/// appear in C++ code. However, this language constrains the set of permissible +/// strings (for node ids) -- it does not support escapes in the +/// string. Additionally, the \c charRange combinator is not supported, because +/// there is no representation of values of type \c CharSourceRange in this +/// (little) language. +llvm::Expected<RangeSelector> parseRangeSelector(llvm::StringRef Input); + +} // namespace transformer +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ diff --git a/clang/lib/Tooling/Transformer/CMakeLists.txt b/clang/lib/Tooling/Transformer/CMakeLists.txt index 281af1007a65..150b71b1ffcd 100644 --- a/clang/lib/Tooling/Transformer/CMakeLists.txt +++ b/clang/lib/Tooling/Transformer/CMakeLists.txt @@ -4,6 +4,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_library(clangTransformer + Parsing.cpp RangeSelector.cpp RewriteRule.cpp SourceCode.cpp diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp new file mode 100644 index 000000000000..1579115b9313 --- /dev/null +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -0,0 +1,279 @@ +//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Transformer/Parsing.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/SourceCode.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include <string> +#include <utility> +#include <vector> + +using namespace clang; +using namespace transformer; + +// FIXME: This implementation is entirely separate from that of the AST +// matchers. Given the similarity of the languages and uses of the two parsers, +// the two should share a common parsing infrastructure, as should other +// Transformer types. We intend to unify this implementation soon to share as +// much as possible with the AST Matchers parsing. + +namespace { +using llvm::Error; +using llvm::Expected; + +template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); + +struct ParseState { + // The remaining input to be processed. + StringRef Input; + // The original input. Not modified during parsing; only for reference in + // error reporting. + StringRef OriginalInput; +}; + +// Represents an intermediate result returned by a parsing function. Functions +// that don't generate values should use `llvm::None` +template <typename ResultType> struct ParseProgress { + ParseState State; + // Intermediate result generated by the Parser. + ResultType Value; +}; + +template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; +template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); + +class ParseError : public llvm::ErrorInfo<ParseError> { +public: + // Required field for all ErrorInfo derivatives. + static char ID; + + ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) + : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), + Excerpt(std::move(InputExcerpt)) {} + + void log(llvm::raw_ostream &OS) const override { + OS << "parse error at position (" << Pos << "): " << ErrorMsg + << ": " + Excerpt; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + + // Position of the error in the input string. + size_t Pos; + std::string ErrorMsg; + // Excerpt of the input starting at the error position. + std::string Excerpt; +}; + +char ParseError::ID; +} // namespace + +static const llvm::StringMap<RangeSelectorOp<std::string>> & +getUnaryStringSelectors() { + static const llvm::StringMap<RangeSelectorOp<std::string>> M = { + {"name", name}, + {"node", node}, + {"statement", statement}, + {"statements", statements}, + {"member", member}, + {"callArgs", callArgs}, + {"elseBranch", elseBranch}, + {"initListElements", initListElements}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & +getUnaryRangeSelectors() { + static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { + {"before", before}, {"after", after}, {"expansion", expansion}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & +getBinaryStringSelectors() { + static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { + {"encloseNodes", range}}; + return M; +} + +static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & +getBinaryRangeSelectors() { + static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> + M = {{"enclose", range}}; + return M; +} + +template <typename Element> +llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, + llvm::StringRef Key) { + auto it = Map.find(Key); + if (it == Map.end()) + return llvm::None; + return it->second; +} + +template <typename ResultType> +ParseProgress<ResultType> makeParseProgress(ParseState State, + ResultType Result) { + return ParseProgress<ResultType>{State, std::move(Result)}; +} + +static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { + size_t Pos = S.OriginalInput.size() - S.Input.size(); + return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), + S.OriginalInput.substr(Pos, 20).str()); +} + +// Returns a new ParseState that advances \c S by \c N characters. +static ParseState advance(ParseState S, size_t N) { + S.Input = S.Input.drop_front(N); + return S; +} + +static StringRef consumeWhitespace(StringRef S) { + return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); }); +} + +// Parses a single expected character \c c from \c State, skipping preceding +// whitespace. Error if the expected character isn't found. +static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty() || State.Input.front() != c) + return makeParseError(State, + ("expected char not found: " + llvm::Twine(c)).str()); + return makeParseProgress(advance(State, 1), llvm::None); +} + +// Parses an identitifer "token" -- handles preceding whitespace. +static ExpectedProgress<std::string> parseId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + auto Id = State.Input.take_while( + [](char c) { return c >= 0 && isIdentifierBody(c); }); + if (Id.empty()) + return makeParseError(State, "failed to parse name"); + return makeParseProgress(advance(State, Id.size()), Id.str()); +} + +// For consistency with the AST matcher parser and C++ code, node ids are +// written as strings. However, we do not support escaping in the string. +static ExpectedProgress<std::string> parseStringId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return makeParseError(State, "unexpected end of input"); + if (!State.Input.consume_front("\"")) + return makeParseError( + State, + "expecting string, but encountered other character or end of input"); + + StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); + if (State.Input.size() == Id.size()) + return makeParseError(State, "unterminated string"); + // Advance past the trailing quote as well. + return makeParseProgress(advance(State, Id.size() + 1), Id.str()); +} + +// Parses a single element surrounded by parens. `Op` is applied to the parsed +// result to create the result of this function call. +template <typename T> +ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, + RangeSelectorOp<T> Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto E = ParseElement(P->State); + if (!E) + return E.takeError(); + + P = parseChar(')', E->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, Op(std::move(E->Value))); +} + +// Parses a pair of elements surrounded by parens and separated by comma. `Op` +// is applied to the parsed results to create the result of this function call. +template <typename T> +ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, + RangeSelectorOp<T, T> Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto Left = ParseElement(P->State); + if (!Left) + return Left.takeError(); + + P = parseChar(',', Left->State); + if (!P) + return P.takeError(); + + auto Right = ParseElement(P->State); + if (!Right) + return Right.takeError(); + + P = parseChar(')', Right->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, + Op(std::move(Left->Value), std::move(Right->Value))); +} + +// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or +// Id operator). Returns StencilType representing the operator on success and +// error if it fails to parse input for an operator. +static ExpectedProgress<RangeSelector> +parseRangeSelectorImpl(ParseState State) { + auto Id = parseId(State); + if (!Id) + return Id.takeError(); + + std::string OpName = std::move(Id->Value); + if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) + return parseSingle(parseStringId, *Op, Id->State); + + if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) + return parseSingle(parseRangeSelectorImpl, *Op, Id->State); + + if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) + return parsePair(parseStringId, *Op, Id->State); + + if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) + return parsePair(parseRangeSelectorImpl, *Op, Id->State); + + return makeParseError(State, "unknown selector name: " + OpName); +} + +Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { + ParseState State = {Input, Input}; + ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); + if (!Result) + return Result.takeError(); + State = Result->State; + // Discard any potentially trailing whitespace. + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return Result->Value; + return makeParseError(State, "unexpected input after selector"); +} diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp index c4560b6be2fc..da5b3c524e4b 100644 --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -10,6 +10,7 @@ #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Tooling/Tooling.h" +#include "clang/Tooling/Transformer/Parsing.h" #include "clang/Tooling/Transformer/SourceCode.h" #include "llvm/Support/Error.h" #include "llvm/Testing/Support/Error.h" @@ -132,13 +133,36 @@ TEST(RangeSelectorTest, BeforeOp) { int f(int x, int y, int z) { return 3; } int g() { return f(/* comment */ 3, 7 /* comment */, 9); } )cc"; - const char *Call = "call"; - TestMatch Match = matchCode(Code, callExpr().bind(Call)); - const auto* E = Match.Result.Nodes.getNodeAs<Expr>(Call); + StringRef CallID = "call"; + ast_matchers::internal::Matcher<Stmt> M = callExpr().bind(CallID); + RangeSelector R = before(node(CallID.str())); + + TestMatch Match = matchCode(Code, M); + const auto *E = Match.Result.Nodes.getNodeAs<Expr>(CallID); assert(E != nullptr); auto ExprBegin = E->getSourceRange().getBegin(); EXPECT_THAT_EXPECTED( - before(node(Call))(Match.Result), + R(Match.Result), + HasValue(EqualsCharSourceRange( + CharSourceRange::getCharRange(ExprBegin, ExprBegin)))); +} + +TEST(RangeSelectorTest, BeforeOpParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + StringRef CallID = "call"; + ast_matchers::internal::Matcher<Stmt> M = callExpr().bind(CallID); + auto R = parseRangeSelector(R"rs(before(node("call")))rs"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + + TestMatch Match = matchCode(Code, M); + const auto *E = Match.Result.Nodes.getNodeAs<Expr>(CallID); + assert(E != nullptr); + auto ExprBegin = E->getSourceRange().getBegin(); + EXPECT_THAT_EXPECTED( + (*R)(Match.Result), HasValue(EqualsCharSourceRange( CharSourceRange::getCharRange(ExprBegin, ExprBegin)))); } @@ -169,45 +193,82 @@ TEST(RangeSelectorTest, AfterOp) { HasValue(EqualsCharSourceRange(ExpectedAfter))); } -TEST(RangeSelectorTest, RangeOp) { +// Node-id specific version. +TEST(RangeSelectorTest, RangeOpNodes) { StringRef Code = R"cc( int f(int x, int y, int z) { return 3; } int g() { return f(/* comment */ 3, 7 /* comment */, 9); } )cc"; - const char *Arg0 = "a0"; - const char *Arg1 = "a1"; - StringRef Call = "call"; - auto Matcher = callExpr(hasArgument(0, expr().bind(Arg0)), - hasArgument(1, expr().bind(Arg1))) - .bind(Call); + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + RangeSelector R = range("a0", "a1"); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7")); +} + +TEST(RangeSelectorTest, RangeOpGeneral) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + RangeSelector R = range(node("a0"), node("a1")); TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7")); +} - // Node-id specific version: - EXPECT_THAT_EXPECTED(select(range(Arg0, Arg1), Match), HasValue("3, 7")); - // General version: - EXPECT_THAT_EXPECTED(select(range(node(Arg0), node(Arg1)), Match), - HasValue("3, 7")); +TEST(RangeSelectorTest, RangeOpNodesParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + auto R = parseRangeSelector(R"rs(encloseNodes("a0", "a1"))rs"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7")); +} + +TEST(RangeSelectorTest, RangeOpGeneralParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + auto R = parseRangeSelector(R"rs(encloseNodes("a0", "a1"))rs"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7")); } TEST(RangeSelectorTest, NodeOpStatement) { StringRef Code = "int f() { return 3; }"; - const char *ID = "id"; - TestMatch Match = matchCode(Code, returnStmt().bind(ID)); - EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("return 3;")); + TestMatch Match = matchCode(Code, returnStmt().bind("id")); + EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("return 3;")); } TEST(RangeSelectorTest, NodeOpExpression) { StringRef Code = "int f() { return 3; }"; - const char *ID = "id"; - TestMatch Match = matchCode(Code, expr().bind(ID)); - EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("3")); + TestMatch Match = matchCode(Code, expr().bind("id")); + EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("3")); } TEST(RangeSelectorTest, StatementOp) { StringRef Code = "int f() { return 3; }"; - const char *ID = "id"; - TestMatch Match = matchCode(Code, expr().bind(ID)); - EXPECT_THAT_EXPECTED(select(statement(ID), Match), HasValue("3;")); + TestMatch Match = matchCode(Code, expr().bind("id")); + RangeSelector R = statement("id"); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3;")); +} + +TEST(RangeSelectorTest, StatementOpParsed) { + StringRef Code = "int f() { return 3; }"; + TestMatch Match = matchCode(Code, expr().bind("id")); + auto R = parseRangeSelector(R"rs(statement("id"))rs"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3;")); } TEST(RangeSelectorTest, MemberOp) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits