https://github.com/cmtice updated https://github.com/llvm/llvm-project/pull/123521
>From 468f73f8539dcb8addf8ed9618d9eb797dabbb01 Mon Sep 17 00:00:00 2001 From: Caroline Tice <cmt...@google.com> Date: Sun, 19 Jan 2025 09:15:34 -0800 Subject: [PATCH 1/5] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). This adds the basic lexer, with unittests, for the Data Inspection Language (DIL) -- see https://discourse.llvm.org/t/rfc-data-inspection-language/69893 This version of the lexer only handles local variables and namespaces, and is designed to work with https://github.com/llvm/llvm-project/pull/120971. --- lldb/include/lldb/ValueObject/DILLexer.h | 156 ++++++++++++++ lldb/source/ValueObject/DILLexer.cpp | 205 +++++++++++++++++++ lldb/unittests/ValueObject/CMakeLists.txt | 1 + lldb/unittests/ValueObject/DILLexerTests.cpp | 193 +++++++++++++++++ 4 files changed, 555 insertions(+) create mode 100644 lldb/include/lldb/ValueObject/DILLexer.h create mode 100644 lldb/source/ValueObject/DILLexer.cpp create mode 100644 lldb/unittests/ValueObject/DILLexerTests.cpp diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h new file mode 100644 index 00000000000000..45c506b2f4106d --- /dev/null +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -0,0 +1,156 @@ +//===-- DILLexer.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_VALUEOBJECT_DILLEXER_H_ +#define LLDB_VALUEOBJECT_DILLEXER_H_ + +#include "llvm/ADT/StringRef.h" +#include <cstdint> +#include <limits.h> +#include <memory> +#include <string> +#include <vector> + +namespace lldb_private { + +namespace dil { + +enum class TokenKind { + coloncolon, + eof, + identifier, + invalid, + kw_namespace, + l_paren, + none, + r_paren, + unknown, +}; + +/// Class defining the tokens generated by the DIL lexer and used by the +/// DIL parser. +class DILToken { +public: + DILToken(dil::TokenKind kind, std::string spelling, uint32_t start) + : m_kind(kind), m_spelling(spelling), m_start_pos(start) {} + + DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {} + + void setKind(dil::TokenKind kind) { m_kind = kind; } + dil::TokenKind getKind() const { return m_kind; } + + std::string getSpelling() const { return m_spelling; } + + uint32_t getLength() const { return m_spelling.size(); } + + bool is(dil::TokenKind kind) const { return m_kind == kind; } + + bool isNot(dil::TokenKind kind) const { return m_kind != kind; } + + bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const { + return is(kind1) || is(kind2); + } + + template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const { + return is(kind) || isOneOf(Ks...); + } + + uint32_t getLocation() const { return m_start_pos; } + + void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) { + m_kind = kind; + m_spelling = spelling; + m_start_pos = start; + } + + static const std::string getTokenName(dil::TokenKind kind); + +private: + dil::TokenKind m_kind; + std::string m_spelling; + uint32_t m_start_pos; // within entire expression string +}; + +/// Class for doing the simple lexing required by DIL. +class DILLexer { +public: + DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) { + m_cur_pos = m_expr.begin(); + // Use UINT_MAX to indicate invalid/uninitialized value. + m_tokens_idx = UINT_MAX; + } + + bool Lex(DILToken &result, bool look_ahead = false); + + bool Is_Word(std::string::iterator start, uint32_t &length); + + uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); } + + /// Update 'result' with the other paremeter values, create a + /// duplicate token, and push the duplicate token onto the vector of + /// lexed tokens. + void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind, + std::string tok_str, uint32_t tok_pos); + + /// Return the lexed token N+1 positions ahead of the 'current' token + /// being handled by the DIL parser. + const DILToken &LookAhead(uint32_t N); + + const DILToken &AcceptLookAhead(uint32_t N); + + /// Return the index for the 'current' token being handled by the DIL parser. + uint32_t GetCurrentTokenIdx() { return m_tokens_idx; } + + /// Return the current token to be handled by the DIL parser. + DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; } + + /// Update the index for the 'current' token, to point to the next lexed + /// token. + bool IncrementTokenIdx() { + if (m_tokens_idx >= m_lexed_tokens.size() - 1) + return false; + + m_tokens_idx++; + return true; + } + + /// Set the index for the 'current' token (to be handled by the parser) + /// to a particular position. Used for either committing 'look ahead' parsing + /// or rolling back tentative parsing. + bool ResetTokenIdx(uint32_t new_value) { + if (new_value > m_lexed_tokens.size() - 1) + return false; + + m_tokens_idx = new_value; + return true; + } + +private: + // The input string we are lexing & parsing. + std::string m_expr; + + // The current position of the lexer within m_expr (the character position, + // within the string, of the next item to be lexed). + std::string::iterator m_cur_pos; + + // Holds all of the tokens lexed so far. + std::vector<DILToken> m_lexed_tokens; + + // Index into m_lexed_tokens; indicates which token the DIL parser is + // currently trying to parse/handle. + uint32_t m_tokens_idx; + + // "invalid" token; to be returned by lexer when 'look ahead' fails. + DILToken m_invalid_token; +}; + +} // namespace dil + +} // namespace lldb_private + +#endif // LLDB_VALUEOBJECT_DILLEXER_H_ diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp new file mode 100644 index 00000000000000..4c2b0b1813bb96 --- /dev/null +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -0,0 +1,205 @@ +//===-- DILLexer.cpp ------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This implements the recursive descent parser for the Data Inspection +// Language (DIL), and its helper functions, which will eventually underlie the +// 'frame variable' command. The language that this parser recognizes is +// described in lldb/docs/dil-expr-lang.ebnf +// +//===----------------------------------------------------------------------===// + +#include "lldb/ValueObject/DILLexer.h" +#include "llvm/ADT/StringMap.h" + +namespace lldb_private { + +namespace dil { + +// For fast keyword lookup. More keywords will be added later. +const llvm::StringMap<dil::TokenKind> Keywords = { + {"namespace", dil::TokenKind::kw_namespace}, +}; + +const std::string DILToken::getTokenName(dil::TokenKind kind) { + switch (kind) { + case dil::TokenKind::coloncolon: + return "coloncolon"; + case dil::TokenKind::eof: + return "eof"; + case dil::TokenKind::identifier: + return "identifier"; + case dil::TokenKind::kw_namespace: + return "namespace"; + case dil::TokenKind::l_paren: + return "l_paren"; + case dil::TokenKind::r_paren: + return "r_paren"; + case dil::TokenKind::unknown: + return "unknown"; + default: + return "token_name"; + } +} + +static bool Is_Letter(char c) { + if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) + return true; + return false; +} + +static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); } + +// A word starts with a letter, underscore, or dollar sign, followed by +// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. +bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) { + bool done = false; + bool dollar_start = false; + + // Must not start with a digit. + if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos)) + return false; + + // First character *may* be a '$', for a register name or convenience + // variable. + if (*m_cur_pos == '$') { + dollar_start = true; + ++m_cur_pos; + length++; + } + + // Contains only letters, digits or underscores + for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) { + char c = *m_cur_pos; + if (!Is_Letter(c) && !Is_Digit(c) && c != '_') { + done = true; + break; + } else + length++; + } + + if (dollar_start && length > 1) // Must have something besides just '$' + return true; + + if (!dollar_start && length > 0) + return true; + + // Not a valid word, so re-set the lexing position. + m_cur_pos = start; + return false; +} + +void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind, + std::string tok_str, uint32_t tok_pos) { + DILToken new_token; + result.setValues(tok_kind, tok_str, tok_pos); + new_token = result; + m_lexed_tokens.push_back(std::move(new_token)); +} + +bool DILLexer::Lex(DILToken &result, bool look_ahead) { + bool retval = true; + + if (!look_ahead) { + // We're being asked for the 'next' token, and not a part of a LookAhead. + // Check to see if we've already lexed it and pushed it onto our tokens + // vector; if so, return the next token from the vector, rather than doing + // more lexing. + if ((m_tokens_idx != UINT_MAX) && + (m_tokens_idx < m_lexed_tokens.size() - 1)) { + result = m_lexed_tokens[m_tokens_idx + 1]; + return retval; + } + } + + // Skip over whitespace (spaces). + while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ') + m_cur_pos++; + + // Check to see if we've reached the end of our input string. + if (m_cur_pos == m_expr.end()) { + UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length()); + return retval; + } + + uint32_t position = m_cur_pos - m_expr.begin(); + ; + std::string::iterator start = m_cur_pos; + uint32_t length = 0; + if (Is_Word(start, length)) { + dil::TokenKind kind; + std::string word = m_expr.substr(position, length); + auto iter = Keywords.find(word); + if (iter != Keywords.end()) + kind = iter->second; + else + kind = dil::TokenKind::identifier; + + UpdateLexedTokens(result, kind, word, position); + return true; + } + + switch (*m_cur_pos) { + case '(': + m_cur_pos++; + UpdateLexedTokens(result, dil::TokenKind::l_paren, "(", position); + return true; + case ')': + m_cur_pos++; + UpdateLexedTokens(result, dil::TokenKind::r_paren, ")", position); + return true; + case ':': + if (position + 1 < m_expr.size() && m_expr[position + 1] == ':') { + m_cur_pos += 2; + UpdateLexedTokens(result, dil::TokenKind::coloncolon, "::", position); + return true; + } + break; + default: + break; + } + // Empty Token + result.setValues(dil::TokenKind::none, "", m_expr.length()); + return false; +} + +const DILToken &DILLexer::LookAhead(uint32_t N) { + uint32_t extra_lexed_tokens = m_lexed_tokens.size() - m_tokens_idx - 1; + + if (N + 1 < extra_lexed_tokens) + return m_lexed_tokens[m_tokens_idx + N + 1]; + + uint32_t remaining_tokens = + (m_tokens_idx + N + 1) - m_lexed_tokens.size() + 1; + + bool done = false; + bool look_ahead = true; + while (!done && remaining_tokens > 0) { + DILToken tok; + Lex(tok, look_ahead); + if (tok.getKind() == dil::TokenKind::eof) + done = true; + remaining_tokens--; + }; + + if (remaining_tokens > 0) { + m_invalid_token.setValues(dil::TokenKind::invalid, "", 0); + return m_invalid_token; + } + + return m_lexed_tokens[m_tokens_idx + N + 1]; +} + +const DILToken &DILLexer::AcceptLookAhead(uint32_t N) { + if (m_tokens_idx + N + 1 > m_lexed_tokens.size()) + return m_invalid_token; + + m_tokens_idx += N + 1; + return m_lexed_tokens[m_tokens_idx]; +} + +} // namespace dil + +} // namespace lldb_private diff --git a/lldb/unittests/ValueObject/CMakeLists.txt b/lldb/unittests/ValueObject/CMakeLists.txt index 8fcc8d62a79979..952f5411a98057 100644 --- a/lldb/unittests/ValueObject/CMakeLists.txt +++ b/lldb/unittests/ValueObject/CMakeLists.txt @@ -1,5 +1,6 @@ add_lldb_unittest(LLDBValueObjectTests DumpValueObjectOptionsTests.cpp + DILLexerTests.cpp LINK_LIBS lldbValueObject diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp new file mode 100644 index 00000000000000..ec6ff86b64d36b --- /dev/null +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -0,0 +1,193 @@ +//===-- DILLexerTests.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/ValueObject/DILLexer.h" +#include "llvm/ADT/StringRef.h" +#include "gtest/gtest.h" +#include <string> + +using llvm::StringRef; + +TEST(DILLexerTests, SimpleTest) { + StringRef dil_input_expr("simple_var"); + uint32_t tok_len = 10; + lldb_private::dil::DILLexer dil_lexer(dil_input_expr); + lldb_private::dil::DILToken dil_token; + dil_token.setKind(lldb_private::dil::TokenKind::unknown); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::unknown); + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + EXPECT_EQ(dil_token.getSpelling(), "simple_var"); + EXPECT_EQ(dil_token.getLength(), tok_len); + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof); +} + +TEST(DILLexerTests, TokenKindTest) { + StringRef dil_input_expr("namespace"); + lldb_private::dil::DILLexer dil_lexer(dil_input_expr); + lldb_private::dil::DILToken dil_token; + dil_token.setKind(lldb_private::dil::TokenKind::unknown); + + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX); + dil_lexer.ResetTokenIdx(0); + + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::kw_namespace); + EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier)); + EXPECT_FALSE(dil_token.is(lldb_private::dil::TokenKind::l_paren)); + EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::eof, + lldb_private::dil::TokenKind::kw_namespace)); + EXPECT_FALSE(dil_token.isOneOf(lldb_private::dil::TokenKind::l_paren, + lldb_private::dil::TokenKind::r_paren, + lldb_private::dil::TokenKind::coloncolon, + lldb_private::dil::TokenKind::eof)); + + dil_token.setKind(lldb_private::dil::TokenKind::identifier); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); +} + +TEST(DILLexerTests, LookAheadTest) { + StringRef dil_input_expr("(anonymous namespace)::some_var"); + lldb_private::dil::DILLexer dil_lexer(dil_input_expr); + lldb_private::dil::DILToken dil_token; + dil_token.setKind(lldb_private::dil::TokenKind::unknown); + uint32_t expect_loc = 23; + + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX); + dil_lexer.ResetTokenIdx(0); + + // Current token is '('; check the next 4 tokens, to make + // sure they are the identifier 'anonymous', the namespace keyword, + // ')' and '::', in that order. + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::l_paren); + EXPECT_EQ(dil_lexer.LookAhead(0).getKind(), + lldb_private::dil::TokenKind::identifier); + EXPECT_EQ(dil_lexer.LookAhead(0).getSpelling(), "anonymous"); + EXPECT_EQ(dil_lexer.LookAhead(1).getKind(), + lldb_private::dil::TokenKind::kw_namespace); + EXPECT_EQ(dil_lexer.LookAhead(2).getKind(), + lldb_private::dil::TokenKind::r_paren); + EXPECT_EQ(dil_lexer.LookAhead(3).getKind(), + lldb_private::dil::TokenKind::coloncolon); + // Verify we've advanced our position counter (lexing location) in the + // input 23 characters (the length of '(anonymous namespace)::'. + EXPECT_EQ(dil_lexer.GetLocation(), expect_loc); + + // Our current index should still be 0, as we only looked ahead; we are still + // officially on the '('. + EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 0); + + // Accept the 'lookahead', so our current token is '::', which has the index + // 4 in our vector of tokens (which starts at zero). + dil_token = dil_lexer.AcceptLookAhead(3); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::coloncolon); + EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 4); + + // Lex the final variable name in the input string + dil_lexer.Lex(dil_token); + dil_lexer.IncrementTokenIdx(); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + EXPECT_EQ(dil_token.getSpelling(), "some_var"); + EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 5); + + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof); +} + +TEST(DILLexerTests, MultiTokenLexTest) { + StringRef dil_input_expr("This string has several identifiers"); + lldb_private::dil::DILLexer dil_lexer(dil_input_expr); + lldb_private::dil::DILToken dil_token; + dil_token.setKind(lldb_private::dil::TokenKind::unknown); + + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX); + dil_lexer.ResetTokenIdx(0); + + EXPECT_EQ(dil_token.getSpelling(), "This"); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + dil_lexer.Lex(dil_token); + dil_lexer.IncrementTokenIdx(); + + EXPECT_EQ(dil_token.getSpelling(), "string"); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + dil_lexer.Lex(dil_token); + dil_lexer.IncrementTokenIdx(); + + EXPECT_EQ(dil_token.getSpelling(), "has"); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + dil_lexer.Lex(dil_token); + dil_lexer.IncrementTokenIdx(); + + EXPECT_EQ(dil_token.getSpelling(), "several"); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + dil_lexer.Lex(dil_token); + dil_lexer.IncrementTokenIdx(); + + EXPECT_EQ(dil_token.getSpelling(), "identifiers"); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + dil_lexer.Lex(dil_token); + dil_lexer.IncrementTokenIdx(); + + EXPECT_EQ(dil_token.getSpelling(), ""); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof); +} + +TEST(DILLexerTests, IdentifiersTest) { + std::vector<std::string> valid_identifiers = { + "$My_name1", + "$pc", + "abcd", + "ab cd", + "_", + "_a", + "_a_", + "a_b", + "this", + "self", + "a", + "MyName" + }; + std::vector<std::string> invalid_identifiers = { + "234", + "2a", + "2", + "$", + "1MyName", + "", + "namespace" + }; + + // Verify that all of the valid identifiers come out as identifier tokens. + for (auto str : valid_identifiers) { + StringRef dil_input_expr(str); + lldb_private::dil::DILLexer dil_lexer(dil_input_expr); + lldb_private::dil::DILToken dil_token; + dil_token.setKind(lldb_private::dil::TokenKind::unknown); + + dil_lexer.Lex(dil_token); + EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + } + + // Verify that none of the invalid identifiers come out as identifier tokens. + for (auto str : invalid_identifiers) { + StringRef dil_input_expr(str); + lldb_private::dil::DILLexer dil_lexer(dil_input_expr); + lldb_private::dil::DILToken dil_token; + dil_token.setKind(lldb_private::dil::TokenKind::unknown); + + dil_lexer.Lex(dil_token); + EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier)); + EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::unknown, + lldb_private::dil::TokenKind::none, + lldb_private::dil::TokenKind::eof, + lldb_private::dil::TokenKind::kw_namespace)); + } +} >From 61a2607a70d90688d395321e846a3be58ccbebcb Mon Sep 17 00:00:00 2001 From: Caroline Tice <cmt...@google.com> Date: Sun, 19 Jan 2025 09:22:51 -0800 Subject: [PATCH 2/5] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language) Update CMakeLists.txt to build DILLexer.cpp. --- lldb/source/ValueObject/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/ValueObject/CMakeLists.txt b/lldb/source/ValueObject/CMakeLists.txt index 70cb3d6d53f071..30c34472289e7b 100644 --- a/lldb/source/ValueObject/CMakeLists.txt +++ b/lldb/source/ValueObject/CMakeLists.txt @@ -1,4 +1,5 @@ add_lldb_library(lldbValueObject + DILLexer.cpp ValueObject.cpp ValueObjectCast.cpp ValueObjectChild.cpp >From 5e2ee55f800726910ad6e56a192554375f61bfb8 Mon Sep 17 00:00:00 2001 From: Caroline Tice <cmt...@google.com> Date: Sat, 25 Jan 2025 16:56:30 -0800 Subject: [PATCH 3/5] Many changes, to address all the review comments: - Remove "DIL" prefix from DILTokenKind and DILToken. - Change the token kind from an enum class to an enum inside the Token class. - Use CamelCase for all the method names. - Replace Token::SetValues method with assignments. - Use a StringRef, not std::string, to hold the input string in the lexer. - Update the lexer to lex all the tokens at one time. Added two new methods for this: LexAll and GetNextToken. - Made some of the Lexer methods private. - Replaces StringMap with StringSwitch for fast keyword lookups. - Updated GetTokenName to directly return StringRefs; removed default case from switch statement. - Cleaned up code format in IsLetter & IsDigit. - Updated IsWord too return an iterator range containing the word (if any). - Updated Lex function (now called by LexAll) to return an llvm::Expected token; removed look_ahead checks; changed the operator lexing to use a vector of operators (as suggested). - Cleaned up LookAhead method, now that we know all tokens have already been lexed. - Added helper function to unittests, to help check a sequence of tokens. - Generally cleaned up the tests to deal with all the code changes. --- lldb/include/lldb/ValueObject/DILLexer.h | 110 ++++---- lldb/source/ValueObject/DILLexer.cpp | 214 +++++++-------- lldb/unittests/ValueObject/DILLexerTests.cpp | 269 ++++++++++--------- 3 files changed, 302 insertions(+), 291 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index 45c506b2f4106d..61e5fe622e51e6 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -10,6 +10,8 @@ #define LLDB_VALUEOBJECT_DILLEXER_H_ #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Error.h" #include <cstdint> #include <limits.h> #include <memory> @@ -20,58 +22,51 @@ namespace lldb_private { namespace dil { -enum class TokenKind { - coloncolon, - eof, - identifier, - invalid, - kw_namespace, - l_paren, - none, - r_paren, - unknown, -}; - /// Class defining the tokens generated by the DIL lexer and used by the /// DIL parser. -class DILToken { +class Token { public: - DILToken(dil::TokenKind kind, std::string spelling, uint32_t start) + enum Kind { + coloncolon, + eof, + identifier, + invalid, + kw_namespace, + l_paren, + none, + r_paren, + unknown, + }; + + Token(Kind kind, std::string spelling, uint32_t start) : m_kind(kind), m_spelling(spelling), m_start_pos(start) {} - DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {} + Token() : m_kind(Kind::none), m_spelling(""), m_start_pos(0) {} - void setKind(dil::TokenKind kind) { m_kind = kind; } - dil::TokenKind getKind() const { return m_kind; } + void SetKind(Kind kind) { m_kind = kind; } - std::string getSpelling() const { return m_spelling; } + Kind GetKind() const { return m_kind; } - uint32_t getLength() const { return m_spelling.size(); } + std::string GetSpelling() const { return m_spelling; } - bool is(dil::TokenKind kind) const { return m_kind == kind; } + uint32_t GetLength() const { return m_spelling.size(); } - bool isNot(dil::TokenKind kind) const { return m_kind != kind; } + bool Is(Kind kind) const { return m_kind == kind; } - bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const { - return is(kind1) || is(kind2); - } + bool IsNot(Kind kind) const { return m_kind != kind; } - template <typename... Ts> bool isOneOf(dil::TokenKind kind, Ts... Ks) const { - return is(kind) || isOneOf(Ks...); - } + bool IsOneOf(Kind kind1, Kind kind2) const { return Is(kind1) || Is(kind2); } - uint32_t getLocation() const { return m_start_pos; } - - void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) { - m_kind = kind; - m_spelling = spelling; - m_start_pos = start; + template <typename... Ts> bool IsOneOf(Kind kind, Ts... Ks) const { + return Is(kind) || IsOneOf(Ks...); } - static const std::string getTokenName(dil::TokenKind kind); + uint32_t GetLocation() const { return m_start_pos; } + + static llvm::StringRef GetTokenName(Kind kind); private: - dil::TokenKind m_kind; + Kind m_kind; std::string m_spelling; uint32_t m_start_pos; // within entire expression string }; @@ -79,35 +74,30 @@ class DILToken { /// Class for doing the simple lexing required by DIL. class DILLexer { public: - DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) { + DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr) { m_cur_pos = m_expr.begin(); // Use UINT_MAX to indicate invalid/uninitialized value. m_tokens_idx = UINT_MAX; + m_invalid_token = Token(Token::invalid, "", 0); } - bool Lex(DILToken &result, bool look_ahead = false); - - bool Is_Word(std::string::iterator start, uint32_t &length); - - uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); } - - /// Update 'result' with the other paremeter values, create a - /// duplicate token, and push the duplicate token onto the vector of - /// lexed tokens. - void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind, - std::string tok_str, uint32_t tok_pos); + llvm::Expected<bool> LexAll(); /// Return the lexed token N+1 positions ahead of the 'current' token /// being handled by the DIL parser. - const DILToken &LookAhead(uint32_t N); + const Token &LookAhead(uint32_t N); + + const Token &AcceptLookAhead(uint32_t N); - const DILToken &AcceptLookAhead(uint32_t N); + const Token &GetNextToken(); /// Return the index for the 'current' token being handled by the DIL parser. uint32_t GetCurrentTokenIdx() { return m_tokens_idx; } /// Return the current token to be handled by the DIL parser. - DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; } + const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; } + + uint32_t NumLexedTokens() { return m_lexed_tokens.size(); } /// Update the index for the 'current' token, to point to the next lexed /// token. @@ -130,23 +120,35 @@ class DILLexer { return true; } + uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); } + private: + llvm::Expected<Token> Lex(); + + llvm::iterator_range<llvm::StringRef::iterator> IsWord(); + + /// Update 'result' with the other paremeter values, create a + /// duplicate token, and push the duplicate token onto the vector of + /// lexed tokens. + void UpdateLexedTokens(Token &result, Token::Kind tok_kind, + std::string tok_str, uint32_t tok_pos); + // The input string we are lexing & parsing. - std::string m_expr; + llvm::StringRef m_expr; // The current position of the lexer within m_expr (the character position, // within the string, of the next item to be lexed). - std::string::iterator m_cur_pos; + llvm::StringRef::iterator m_cur_pos; // Holds all of the tokens lexed so far. - std::vector<DILToken> m_lexed_tokens; + std::vector<Token> m_lexed_tokens; // Index into m_lexed_tokens; indicates which token the DIL parser is // currently trying to parse/handle. uint32_t m_tokens_idx; // "invalid" token; to be returned by lexer when 'look ahead' fails. - DILToken m_invalid_token; + Token m_invalid_token; }; } // namespace dil diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index 4c2b0b1813bb96..30e4bcb04e6505 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -12,107 +12,99 @@ //===----------------------------------------------------------------------===// #include "lldb/ValueObject/DILLexer.h" -#include "llvm/ADT/StringMap.h" +#include "lldb/Utility/Status.h" +#include "llvm/ADT/StringSwitch.h" namespace lldb_private { namespace dil { -// For fast keyword lookup. More keywords will be added later. -const llvm::StringMap<dil::TokenKind> Keywords = { - {"namespace", dil::TokenKind::kw_namespace}, -}; - -const std::string DILToken::getTokenName(dil::TokenKind kind) { +llvm::StringRef Token::GetTokenName(Kind kind) { switch (kind) { - case dil::TokenKind::coloncolon: + case Kind::coloncolon: return "coloncolon"; - case dil::TokenKind::eof: + case Kind::eof: return "eof"; - case dil::TokenKind::identifier: + case Kind::identifier: return "identifier"; - case dil::TokenKind::kw_namespace: + case Kind::invalid: + return "invalid"; + case Kind::kw_namespace: return "namespace"; - case dil::TokenKind::l_paren: + case Kind::l_paren: return "l_paren"; - case dil::TokenKind::r_paren: + case Kind::none: + return "none"; + case Kind::r_paren: return "r_paren"; - case dil::TokenKind::unknown: + case Kind::unknown: return "unknown"; - default: - return "token_name"; } } -static bool Is_Letter(char c) { - if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) - return true; - return false; +static bool IsLetter(char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); } -static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); } +static bool IsDigit(char c) { return '0' <= c && c <= '9'; } // A word starts with a letter, underscore, or dollar sign, followed by // letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. -bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) { - bool done = false; +llvm::iterator_range<llvm::StringRef::iterator> DILLexer::IsWord() { + llvm::StringRef::iterator start = m_cur_pos; bool dollar_start = false; // Must not start with a digit. - if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos)) - return false; + if (m_cur_pos == m_expr.end() || IsDigit(*m_cur_pos)) + return llvm::make_range(m_cur_pos, m_cur_pos); // First character *may* be a '$', for a register name or convenience // variable. if (*m_cur_pos == '$') { dollar_start = true; ++m_cur_pos; - length++; } // Contains only letters, digits or underscores - for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) { + for (; m_cur_pos != m_expr.end(); ++m_cur_pos) { char c = *m_cur_pos; - if (!Is_Letter(c) && !Is_Digit(c) && c != '_') { - done = true; + if (!IsLetter(c) && !IsDigit(c) && c != '_') break; - } else - length++; } - if (dollar_start && length > 1) // Must have something besides just '$' - return true; - - if (!dollar_start && length > 0) - return true; + // If first char is '$', make sure there's at least one mare char, or it's + // invalid. + if (dollar_start && (m_cur_pos - start <= 1)) { + m_cur_pos = start; + return llvm::make_range(start, start); // Empty range + } - // Not a valid word, so re-set the lexing position. - m_cur_pos = start; - return false; + return llvm::make_range(start, m_cur_pos); } -void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind, +void DILLexer::UpdateLexedTokens(Token &result, Token::Kind tok_kind, std::string tok_str, uint32_t tok_pos) { - DILToken new_token; - result.setValues(tok_kind, tok_str, tok_pos); - new_token = result; + Token new_token(tok_kind, tok_str, tok_pos); + result = new_token; m_lexed_tokens.push_back(std::move(new_token)); } -bool DILLexer::Lex(DILToken &result, bool look_ahead) { - bool retval = true; - - if (!look_ahead) { - // We're being asked for the 'next' token, and not a part of a LookAhead. - // Check to see if we've already lexed it and pushed it onto our tokens - // vector; if so, return the next token from the vector, rather than doing - // more lexing. - if ((m_tokens_idx != UINT_MAX) && - (m_tokens_idx < m_lexed_tokens.size() - 1)) { - result = m_lexed_tokens[m_tokens_idx + 1]; - return retval; +llvm::Expected<bool> DILLexer::LexAll() { + bool done = false; + while (!done) { + auto tok_or_err = Lex(); + if (!tok_or_err) + return tok_or_err.takeError(); + Token token = *tok_or_err; + if (token.GetKind() == Token::eof) { + done = true; } } + return true; +} + +llvm::Expected<Token> DILLexer::Lex() { + Token result; // Skip over whitespace (spaces). while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ') @@ -120,79 +112,52 @@ bool DILLexer::Lex(DILToken &result, bool look_ahead) { // Check to see if we've reached the end of our input string. if (m_cur_pos == m_expr.end()) { - UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length()); - return retval; + UpdateLexedTokens(result, Token::eof, "", (uint32_t)m_expr.size()); + return result; } uint32_t position = m_cur_pos - m_expr.begin(); - ; - std::string::iterator start = m_cur_pos; - uint32_t length = 0; - if (Is_Word(start, length)) { - dil::TokenKind kind; - std::string word = m_expr.substr(position, length); - auto iter = Keywords.find(word); - if (iter != Keywords.end()) - kind = iter->second; - else - kind = dil::TokenKind::identifier; - - UpdateLexedTokens(result, kind, word, position); - return true; + llvm::StringRef::iterator start = m_cur_pos; + llvm::iterator_range<llvm::StringRef::iterator> word_range = IsWord(); + if (!word_range.empty()) { + uint32_t length = word_range.end() - word_range.begin(); + llvm::StringRef word(m_expr.substr(position, length)); + // We will be adding more keywords here in the future... + Token::Kind kind = llvm::StringSwitch<Token::Kind>(word) + .Case("namespace", Token::kw_namespace) + .Default(Token::identifier); + UpdateLexedTokens(result, kind, word.str(), position); + return result; } - switch (*m_cur_pos) { - case '(': - m_cur_pos++; - UpdateLexedTokens(result, dil::TokenKind::l_paren, "(", position); - return true; - case ')': - m_cur_pos++; - UpdateLexedTokens(result, dil::TokenKind::r_paren, ")", position); - return true; - case ':': - if (position + 1 < m_expr.size() && m_expr[position + 1] == ':') { - m_cur_pos += 2; - UpdateLexedTokens(result, dil::TokenKind::coloncolon, "::", position); - return true; + m_cur_pos = start; + llvm::StringRef remainder(m_expr.substr(position, m_expr.end() - m_cur_pos)); + std::vector<std::pair<Token::Kind, const char *>> operators = { + {Token::l_paren, "("}, + {Token::r_paren, ")"}, + {Token::coloncolon, "::"}, + }; + for (auto [kind, str] : operators) { + if (remainder.consume_front(str)) { + m_cur_pos += strlen(str); + UpdateLexedTokens(result, kind, str, position); + return result; } - break; - default: - break; } - // Empty Token - result.setValues(dil::TokenKind::none, "", m_expr.length()); - return false; -} -const DILToken &DILLexer::LookAhead(uint32_t N) { - uint32_t extra_lexed_tokens = m_lexed_tokens.size() - m_tokens_idx - 1; + // Unrecognized character(s) in string; unable to lex it. + Status error("Unable to lex input string"); + return error.ToError(); +} - if (N + 1 < extra_lexed_tokens) +const Token &DILLexer::LookAhead(uint32_t N) { + if (m_tokens_idx + N + 1 < m_lexed_tokens.size()) return m_lexed_tokens[m_tokens_idx + N + 1]; - uint32_t remaining_tokens = - (m_tokens_idx + N + 1) - m_lexed_tokens.size() + 1; - - bool done = false; - bool look_ahead = true; - while (!done && remaining_tokens > 0) { - DILToken tok; - Lex(tok, look_ahead); - if (tok.getKind() == dil::TokenKind::eof) - done = true; - remaining_tokens--; - }; - - if (remaining_tokens > 0) { - m_invalid_token.setValues(dil::TokenKind::invalid, "", 0); - return m_invalid_token; - } - - return m_lexed_tokens[m_tokens_idx + N + 1]; + return m_invalid_token; } -const DILToken &DILLexer::AcceptLookAhead(uint32_t N) { +const Token &DILLexer::AcceptLookAhead(uint32_t N) { if (m_tokens_idx + N + 1 > m_lexed_tokens.size()) return m_invalid_token; @@ -200,6 +165,25 @@ const DILToken &DILLexer::AcceptLookAhead(uint32_t N) { return m_lexed_tokens[m_tokens_idx]; } +const Token &DILLexer::GetNextToken() { + if (m_tokens_idx == UINT_MAX) + m_tokens_idx = 0; + else + m_tokens_idx++; + + // Return the next token in the vector of lexed tokens. + if (m_tokens_idx < m_lexed_tokens.size()) + return m_lexed_tokens[m_tokens_idx]; + + // We're already at/beyond the end of our lexed tokens. If the last token + // is an eof token, return it. + if (m_lexed_tokens[m_lexed_tokens.size() - 1].GetKind() == Token::eof) + return m_lexed_tokens[m_lexed_tokens.size() - 1]; + + // Return the invalid token. + return m_invalid_token; +} + } // namespace dil } // namespace lldb_private diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index ec6ff86b64d36b..137013e40d6adf 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -13,131 +13,145 @@ using llvm::StringRef; +bool VerifyExpectedTokens( + lldb_private::dil::DILLexer &lexer, + std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> + exp_tokens, + uint32_t start_pos) { + if (lexer.NumLexedTokens() - start_pos < exp_tokens.size()) + return false; + + if (start_pos > 0) + lexer.ResetTokenIdx(start_pos - + 1); // GetNextToken increments the idx first. + for (const auto &pair : exp_tokens) { + lldb_private::dil::Token token = lexer.GetNextToken(); + if (token.GetKind() != pair.first || token.GetSpelling() != pair.second) + return false; + } + + return true; +} + TEST(DILLexerTests, SimpleTest) { - StringRef dil_input_expr("simple_var"); + StringRef input_expr("simple_var"); uint32_t tok_len = 10; - lldb_private::dil::DILLexer dil_lexer(dil_input_expr); - lldb_private::dil::DILToken dil_token; - dil_token.setKind(lldb_private::dil::TokenKind::unknown); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::unknown); - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - EXPECT_EQ(dil_token.getSpelling(), "simple_var"); - EXPECT_EQ(dil_token.getLength(), tok_len); - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof); + lldb_private::dil::DILLexer lexer(input_expr); + lldb_private::dil::Token token; + token.SetKind(lldb_private::dil::Token::unknown); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::unknown); + auto success = lexer.LexAll(); + + if (!success) { + EXPECT_TRUE(false); + } + token = lexer.GetNextToken(); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(token.GetSpelling(), "simple_var"); + EXPECT_EQ(token.GetLength(), tok_len); + token = lexer.GetNextToken(); + ; + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); } TEST(DILLexerTests, TokenKindTest) { - StringRef dil_input_expr("namespace"); - lldb_private::dil::DILLexer dil_lexer(dil_input_expr); - lldb_private::dil::DILToken dil_token; - dil_token.setKind(lldb_private::dil::TokenKind::unknown); - - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX); - dil_lexer.ResetTokenIdx(0); - - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::kw_namespace); - EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier)); - EXPECT_FALSE(dil_token.is(lldb_private::dil::TokenKind::l_paren)); - EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::eof, - lldb_private::dil::TokenKind::kw_namespace)); - EXPECT_FALSE(dil_token.isOneOf(lldb_private::dil::TokenKind::l_paren, - lldb_private::dil::TokenKind::r_paren, - lldb_private::dil::TokenKind::coloncolon, - lldb_private::dil::TokenKind::eof)); - - dil_token.setKind(lldb_private::dil::TokenKind::identifier); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + StringRef input_expr("namespace"); + lldb_private::dil::DILLexer lexer(input_expr); + lldb_private::dil::Token token; + token.SetKind(lldb_private::dil::Token::unknown); + + auto success = lexer.LexAll(); + if (!success) { + EXPECT_TRUE(false); + } + token = lexer.GetNextToken(); + + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::kw_namespace); + EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier)); + EXPECT_FALSE(token.Is(lldb_private::dil::Token::l_paren)); + EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::eof, + lldb_private::dil::Token::kw_namespace)); + EXPECT_FALSE(token.IsOneOf( + lldb_private::dil::Token::l_paren, lldb_private::dil::Token::r_paren, + lldb_private::dil::Token::coloncolon, lldb_private::dil::Token::eof)); + + token.SetKind(lldb_private::dil::Token::identifier); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); } TEST(DILLexerTests, LookAheadTest) { - StringRef dil_input_expr("(anonymous namespace)::some_var"); - lldb_private::dil::DILLexer dil_lexer(dil_input_expr); - lldb_private::dil::DILToken dil_token; - dil_token.setKind(lldb_private::dil::TokenKind::unknown); + StringRef input_expr("(anonymous namespace)::some_var"); + lldb_private::dil::DILLexer lexer(input_expr); + lldb_private::dil::Token token; + token.SetKind(lldb_private::dil::Token::unknown); uint32_t expect_loc = 23; - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX); - dil_lexer.ResetTokenIdx(0); + auto success = lexer.LexAll(); + if (!success) { + EXPECT_TRUE(false); + } + token = lexer.GetNextToken(); // Current token is '('; check the next 4 tokens, to make // sure they are the identifier 'anonymous', the namespace keyword, // ')' and '::', in that order. - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::l_paren); - EXPECT_EQ(dil_lexer.LookAhead(0).getKind(), - lldb_private::dil::TokenKind::identifier); - EXPECT_EQ(dil_lexer.LookAhead(0).getSpelling(), "anonymous"); - EXPECT_EQ(dil_lexer.LookAhead(1).getKind(), - lldb_private::dil::TokenKind::kw_namespace); - EXPECT_EQ(dil_lexer.LookAhead(2).getKind(), - lldb_private::dil::TokenKind::r_paren); - EXPECT_EQ(dil_lexer.LookAhead(3).getKind(), - lldb_private::dil::TokenKind::coloncolon); - // Verify we've advanced our position counter (lexing location) in the - // input 23 characters (the length of '(anonymous namespace)::'. - EXPECT_EQ(dil_lexer.GetLocation(), expect_loc); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::l_paren); + EXPECT_EQ(lexer.LookAhead(0).GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(lexer.LookAhead(0).GetSpelling(), "anonymous"); + EXPECT_EQ(lexer.LookAhead(1).GetKind(), + lldb_private::dil::Token::kw_namespace); + EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::r_paren); + EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::coloncolon); // Our current index should still be 0, as we only looked ahead; we are still // officially on the '('. - EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 0); + EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)0); // Accept the 'lookahead', so our current token is '::', which has the index // 4 in our vector of tokens (which starts at zero). - dil_token = dil_lexer.AcceptLookAhead(3); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::coloncolon); - EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 4); - - // Lex the final variable name in the input string - dil_lexer.Lex(dil_token); - dil_lexer.IncrementTokenIdx(); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - EXPECT_EQ(dil_token.getSpelling(), "some_var"); - EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), 5); - - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof); + token = lexer.AcceptLookAhead(3); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::coloncolon); + EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)4); + + token = lexer.GetNextToken(); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(token.GetSpelling(), "some_var"); + EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)5); + // Verify we've advanced our position counter (lexing location) in the + // input 23 characters (the length of '(anonymous namespace)::'. + EXPECT_EQ(token.GetLocation(), expect_loc); + token = lexer.GetNextToken(); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); } TEST(DILLexerTests, MultiTokenLexTest) { - StringRef dil_input_expr("This string has several identifiers"); - lldb_private::dil::DILLexer dil_lexer(dil_input_expr); - lldb_private::dil::DILToken dil_token; - dil_token.setKind(lldb_private::dil::TokenKind::unknown); - - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_lexer.GetCurrentTokenIdx(), UINT_MAX); - dil_lexer.ResetTokenIdx(0); - - EXPECT_EQ(dil_token.getSpelling(), "This"); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - dil_lexer.Lex(dil_token); - dil_lexer.IncrementTokenIdx(); - - EXPECT_EQ(dil_token.getSpelling(), "string"); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - dil_lexer.Lex(dil_token); - dil_lexer.IncrementTokenIdx(); - - EXPECT_EQ(dil_token.getSpelling(), "has"); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - dil_lexer.Lex(dil_token); - dil_lexer.IncrementTokenIdx(); - - EXPECT_EQ(dil_token.getSpelling(), "several"); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - dil_lexer.Lex(dil_token); - dil_lexer.IncrementTokenIdx(); - - EXPECT_EQ(dil_token.getSpelling(), "identifiers"); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); - dil_lexer.Lex(dil_token); - dil_lexer.IncrementTokenIdx(); - - EXPECT_EQ(dil_token.getSpelling(), ""); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::eof); + StringRef input_expr("This string has (several ) ::identifiers"); + lldb_private::dil::DILLexer lexer(input_expr); + lldb_private::dil::Token token; + token.SetKind(lldb_private::dil::Token::unknown); + + auto success = lexer.LexAll(); + if (!success) { + EXPECT_TRUE(false); + } + + std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> + expected_tokens = { + {lldb_private::dil::Token::identifier, "This"}, + {lldb_private::dil::Token::identifier, "string"}, + {lldb_private::dil::Token::identifier, "has"}, + {lldb_private::dil::Token::l_paren, "("}, + {lldb_private::dil::Token::identifier, "several"}, + {lldb_private::dil::Token::r_paren, ")"}, + {lldb_private::dil::Token::coloncolon, "::"}, + {lldb_private::dil::Token::identifier, "identifiers"}, + }; + + EXPECT_TRUE(VerifyExpectedTokens(lexer, expected_tokens, 0)); + + token = lexer.GetNextToken(); + EXPECT_EQ(token.GetSpelling(), ""); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); } TEST(DILLexerTests, IdentifiersTest) { @@ -166,28 +180,39 @@ TEST(DILLexerTests, IdentifiersTest) { }; // Verify that all of the valid identifiers come out as identifier tokens. - for (auto str : valid_identifiers) { - StringRef dil_input_expr(str); - lldb_private::dil::DILLexer dil_lexer(dil_input_expr); - lldb_private::dil::DILToken dil_token; - dil_token.setKind(lldb_private::dil::TokenKind::unknown); - - dil_lexer.Lex(dil_token); - EXPECT_EQ(dil_token.getKind(), lldb_private::dil::TokenKind::identifier); + for (auto &str : valid_identifiers) { + SCOPED_TRACE(str); + lldb_private::dil::DILLexer lexer(str); + lldb_private::dil::Token token; + token.SetKind(lldb_private::dil::Token::unknown); + + auto success = lexer.LexAll(); + if (!success) { + EXPECT_TRUE(false); + } + token = lexer.GetNextToken(); + EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); } // Verify that none of the invalid identifiers come out as identifier tokens. - for (auto str : invalid_identifiers) { - StringRef dil_input_expr(str); - lldb_private::dil::DILLexer dil_lexer(dil_input_expr); - lldb_private::dil::DILToken dil_token; - dil_token.setKind(lldb_private::dil::TokenKind::unknown); - - dil_lexer.Lex(dil_token); - EXPECT_TRUE(dil_token.isNot(lldb_private::dil::TokenKind::identifier)); - EXPECT_TRUE(dil_token.isOneOf(lldb_private::dil::TokenKind::unknown, - lldb_private::dil::TokenKind::none, - lldb_private::dil::TokenKind::eof, - lldb_private::dil::TokenKind::kw_namespace)); + for (auto &str : invalid_identifiers) { + SCOPED_TRACE(str); + lldb_private::dil::DILLexer lexer(str); + lldb_private::dil::Token token; + token.SetKind(lldb_private::dil::Token::unknown); + + auto success = lexer.LexAll(); + // In this case, it's ok for Lex() to return an error. + if (!success) { + llvm::consumeError(success.takeError()); + } else { + // We didn't get an error; make sure we did not get an identifier token. + token = lexer.GetNextToken(); + EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier)); + EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::unknown, + lldb_private::dil::Token::none, + lldb_private::dil::Token::eof, + lldb_private::dil::Token::kw_namespace)); + } } } >From ccf5203595ec22d2e58d774ecbe58cdccfc2f106 Mon Sep 17 00:00:00 2001 From: Caroline Tice <cmt...@google.com> Date: Thu, 30 Jan 2025 15:07:33 -0800 Subject: [PATCH 4/5] Address latest review comments: - Remove 'namespace' as a keyword (make it a normal identifier) - Remove 'invalid' and 'none' token types. - Remove unnecessary SetKind and GetLength methods from Tokens. - Re-arrange Lexer: - Give it a static Create method, which pre-lexes all the tokens - Make Lex method static - Pull IsWord method out of Lexer class - Make the Lexer constructor private. - Remove LexAll, GetLocation, UpdateLexedTokens, AcceptLookAhead, GetNextToken, and IncrementTokenIdx methods from Lexer class. - Add new 'Advance' method (to help replace some of the removed methods). - Update indexing in LookAhead (LookAead(0) now means the 'current' token). - Remove m_cur_pos data member from Lexer class. - Replace m_invalid_token with m_eof_token. - Use 'remainder' StringRef to help with lexing. - Update the unit tests to handle all the code changes in the Lexer. - Update the unit tests to use ASSERT_THAT_EXPECTED to check llvm::Expected return values. - Update the unit tests to use "testing::ElementsAre(testing::Pair ..." to verify all the lexed tokens; also added helper function ExtractTokenData, and deleted function VerifyExpectedTokens. --- lldb/include/lldb/ValueObject/DILLexer.h | 97 +++----- lldb/source/ValueObject/DILLexer.cpp | 153 ++++-------- lldb/unittests/ValueObject/CMakeLists.txt | 1 + lldb/unittests/ValueObject/DILLexerTests.cpp | 232 ++++++++----------- 4 files changed, 187 insertions(+), 296 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index 61e5fe622e51e6..9e6cec18a68672 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -18,9 +18,7 @@ #include <string> #include <vector> -namespace lldb_private { - -namespace dil { +namespace lldb_private::dil { /// Class defining the tokens generated by the DIL lexer and used by the /// DIL parser. @@ -30,10 +28,7 @@ class Token { coloncolon, eof, identifier, - invalid, - kw_namespace, l_paren, - none, r_paren, unknown, }; @@ -41,16 +36,10 @@ class Token { Token(Kind kind, std::string spelling, uint32_t start) : m_kind(kind), m_spelling(spelling), m_start_pos(start) {} - Token() : m_kind(Kind::none), m_spelling(""), m_start_pos(0) {} - - void SetKind(Kind kind) { m_kind = kind; } - Kind GetKind() const { return m_kind; } std::string GetSpelling() const { return m_spelling; } - uint32_t GetLength() const { return m_spelling.size(); } - bool Is(Kind kind) const { return m_kind == kind; } bool IsNot(Kind kind) const { return m_kind != kind; } @@ -74,72 +63,58 @@ class Token { /// Class for doing the simple lexing required by DIL. class DILLexer { public: - DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr) { - m_cur_pos = m_expr.begin(); - // Use UINT_MAX to indicate invalid/uninitialized value. - m_tokens_idx = UINT_MAX; - m_invalid_token = Token(Token::invalid, "", 0); - } - - llvm::Expected<bool> LexAll(); - - /// Return the lexed token N+1 positions ahead of the 'current' token - /// being handled by the DIL parser. - const Token &LookAhead(uint32_t N); - - const Token &AcceptLookAhead(uint32_t N); - - const Token &GetNextToken(); - - /// Return the index for the 'current' token being handled by the DIL parser. - uint32_t GetCurrentTokenIdx() { return m_tokens_idx; } + /// Lexes all the tokens in expr and calls the private constructor + /// with the lexed tokens. + static llvm::Expected<DILLexer> Create(llvm::StringRef expr); /// Return the current token to be handled by the DIL parser. const Token &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; } - uint32_t NumLexedTokens() { return m_lexed_tokens.size(); } + /// Advance the current token position by N. + void Advance(uint32_t N = 1) { + // UINT_MAX means uninitialized, no "current" position, so move to start. + if (m_tokens_idx == UINT_MAX) + m_tokens_idx = 0; + else if (m_tokens_idx + N >= m_lexed_tokens.size()) + // N is too large; advance to the end of the lexed tokens. + m_tokens_idx = m_lexed_tokens.size() - 1; + else + m_tokens_idx += N; + } - /// Update the index for the 'current' token, to point to the next lexed - /// token. - bool IncrementTokenIdx() { - if (m_tokens_idx >= m_lexed_tokens.size() - 1) - return false; + /// Return the lexed token N positions ahead of the 'current' token + /// being handled by the DIL parser. + const Token &LookAhead(uint32_t N) { + if (m_tokens_idx + N < m_lexed_tokens.size()) + return m_lexed_tokens[m_tokens_idx + N]; - m_tokens_idx++; - return true; + return m_eof_token; } + /// Return the index for the 'current' token being handled by the DIL parser. + uint32_t GetCurrentTokenIdx() { return m_tokens_idx; } + /// Set the index for the 'current' token (to be handled by the parser) /// to a particular position. Used for either committing 'look ahead' parsing /// or rolling back tentative parsing. - bool ResetTokenIdx(uint32_t new_value) { - if (new_value > m_lexed_tokens.size() - 1) - return false; - + void ResetTokenIdx(uint32_t new_value) { + assert(new_value == UINT_MAX || new_value < m_lexed_tokens.size()); m_tokens_idx = new_value; - return true; } - uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); } + uint32_t NumLexedTokens() { return m_lexed_tokens.size(); } private: - llvm::Expected<Token> Lex(); + DILLexer(llvm::StringRef dil_expr, std::vector<Token> lexed_tokens) + : m_expr(dil_expr), m_lexed_tokens(lexed_tokens), m_tokens_idx(UINT_MAX), + m_eof_token(Token(Token::eof, "", 0)) {} - llvm::iterator_range<llvm::StringRef::iterator> IsWord(); - - /// Update 'result' with the other paremeter values, create a - /// duplicate token, and push the duplicate token onto the vector of - /// lexed tokens. - void UpdateLexedTokens(Token &result, Token::Kind tok_kind, - std::string tok_str, uint32_t tok_pos); + static llvm::Expected<Token> Lex(llvm::StringRef expr, + llvm::StringRef &remainder); // The input string we are lexing & parsing. llvm::StringRef m_expr; - // The current position of the lexer within m_expr (the character position, - // within the string, of the next item to be lexed). - llvm::StringRef::iterator m_cur_pos; - // Holds all of the tokens lexed so far. std::vector<Token> m_lexed_tokens; @@ -147,12 +122,10 @@ class DILLexer { // currently trying to parse/handle. uint32_t m_tokens_idx; - // "invalid" token; to be returned by lexer when 'look ahead' fails. - Token m_invalid_token; + // "eof" token; to be returned by lexer when 'look ahead' fails. + Token m_eof_token; }; -} // namespace dil - -} // namespace lldb_private +} // namespace lldb_private::dil #endif // LLDB_VALUEOBJECT_DILLEXER_H_ diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index 30e4bcb04e6505..b92bb86c8219c6 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -15,9 +15,7 @@ #include "lldb/Utility/Status.h" #include "llvm/ADT/StringSwitch.h" -namespace lldb_private { - -namespace dil { +namespace lldb_private::dil { llvm::StringRef Token::GetTokenName(Kind kind) { switch (kind) { @@ -27,14 +25,8 @@ llvm::StringRef Token::GetTokenName(Kind kind) { return "eof"; case Kind::identifier: return "identifier"; - case Kind::invalid: - return "invalid"; - case Kind::kw_namespace: - return "namespace"; case Kind::l_paren: return "l_paren"; - case Kind::none: - return "none"; case Kind::r_paren: return "r_paren"; case Kind::unknown: @@ -50,140 +42,91 @@ static bool IsDigit(char c) { return '0' <= c && c <= '9'; } // A word starts with a letter, underscore, or dollar sign, followed by // letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores. -llvm::iterator_range<llvm::StringRef::iterator> DILLexer::IsWord() { - llvm::StringRef::iterator start = m_cur_pos; +static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr, + llvm::StringRef &remainder) { + llvm::StringRef::iterator cur_pos = expr.end() - remainder.size(); + llvm::StringRef::iterator start = cur_pos; bool dollar_start = false; // Must not start with a digit. - if (m_cur_pos == m_expr.end() || IsDigit(*m_cur_pos)) - return llvm::make_range(m_cur_pos, m_cur_pos); + if (cur_pos == expr.end() || IsDigit(*cur_pos)) + return std::nullopt; // First character *may* be a '$', for a register name or convenience // variable. - if (*m_cur_pos == '$') { + if (*cur_pos == '$') { dollar_start = true; - ++m_cur_pos; + ++cur_pos; } // Contains only letters, digits or underscores - for (; m_cur_pos != m_expr.end(); ++m_cur_pos) { - char c = *m_cur_pos; + for (; cur_pos != expr.end(); ++cur_pos) { + char c = *cur_pos; if (!IsLetter(c) && !IsDigit(c) && c != '_') break; } // If first char is '$', make sure there's at least one mare char, or it's // invalid. - if (dollar_start && (m_cur_pos - start <= 1)) { - m_cur_pos = start; - return llvm::make_range(start, start); // Empty range + if (dollar_start && (cur_pos - start <= 1)) { + cur_pos = start; + return std::nullopt; } - return llvm::make_range(start, m_cur_pos); -} + if (cur_pos == start) + return std::nullopt; + + llvm::StringRef word = expr.substr(start - expr.begin(), cur_pos - start); + if (remainder.consume_front(word)) + return word; -void DILLexer::UpdateLexedTokens(Token &result, Token::Kind tok_kind, - std::string tok_str, uint32_t tok_pos) { - Token new_token(tok_kind, tok_str, tok_pos); - result = new_token; - m_lexed_tokens.push_back(std::move(new_token)); + return std::nullopt; } -llvm::Expected<bool> DILLexer::LexAll() { - bool done = false; - while (!done) { - auto tok_or_err = Lex(); - if (!tok_or_err) - return tok_or_err.takeError(); - Token token = *tok_or_err; - if (token.GetKind() == Token::eof) { - done = true; +llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) { + std::vector<Token> tokens; + llvm::StringRef remainder = expr; + do { + if (llvm::Expected<Token> t = Lex(expr, remainder)) { + tokens.push_back(std::move(*t)); + } else { + return t.takeError(); } - } - return true; + } while (tokens.back().GetKind() != Token::eof); + return DILLexer(expr, std::move(tokens)); } -llvm::Expected<Token> DILLexer::Lex() { - Token result; - +llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, + llvm::StringRef &remainder) { // Skip over whitespace (spaces). - while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ') - m_cur_pos++; + remainder = remainder.ltrim(); + llvm::StringRef::iterator cur_pos = expr.end() - remainder.size(); // Check to see if we've reached the end of our input string. - if (m_cur_pos == m_expr.end()) { - UpdateLexedTokens(result, Token::eof, "", (uint32_t)m_expr.size()); - return result; + if (remainder.empty() || cur_pos == expr.end()) + return Token(Token::eof, "", (uint32_t)expr.size()); + + uint32_t position = cur_pos - expr.begin(); + std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder); + if (maybe_word) { + llvm::StringRef word = *maybe_word; + return Token(Token::identifier, word.str(), position); } - uint32_t position = m_cur_pos - m_expr.begin(); - llvm::StringRef::iterator start = m_cur_pos; - llvm::iterator_range<llvm::StringRef::iterator> word_range = IsWord(); - if (!word_range.empty()) { - uint32_t length = word_range.end() - word_range.begin(); - llvm::StringRef word(m_expr.substr(position, length)); - // We will be adding more keywords here in the future... - Token::Kind kind = llvm::StringSwitch<Token::Kind>(word) - .Case("namespace", Token::kw_namespace) - .Default(Token::identifier); - UpdateLexedTokens(result, kind, word.str(), position); - return result; - } - - m_cur_pos = start; - llvm::StringRef remainder(m_expr.substr(position, m_expr.end() - m_cur_pos)); - std::vector<std::pair<Token::Kind, const char *>> operators = { + constexpr std::pair<Token::Kind, const char *> operators[] = { {Token::l_paren, "("}, {Token::r_paren, ")"}, {Token::coloncolon, "::"}, }; for (auto [kind, str] : operators) { if (remainder.consume_front(str)) { - m_cur_pos += strlen(str); - UpdateLexedTokens(result, kind, str, position); - return result; + cur_pos += strlen(str); + return Token(kind, str, position); } } // Unrecognized character(s) in string; unable to lex it. - Status error("Unable to lex input string"); - return error.ToError(); -} - -const Token &DILLexer::LookAhead(uint32_t N) { - if (m_tokens_idx + N + 1 < m_lexed_tokens.size()) - return m_lexed_tokens[m_tokens_idx + N + 1]; - - return m_invalid_token; + return llvm::createStringError("Unable to lex input string"); } -const Token &DILLexer::AcceptLookAhead(uint32_t N) { - if (m_tokens_idx + N + 1 > m_lexed_tokens.size()) - return m_invalid_token; - - m_tokens_idx += N + 1; - return m_lexed_tokens[m_tokens_idx]; -} - -const Token &DILLexer::GetNextToken() { - if (m_tokens_idx == UINT_MAX) - m_tokens_idx = 0; - else - m_tokens_idx++; - - // Return the next token in the vector of lexed tokens. - if (m_tokens_idx < m_lexed_tokens.size()) - return m_lexed_tokens[m_tokens_idx]; - - // We're already at/beyond the end of our lexed tokens. If the last token - // is an eof token, return it. - if (m_lexed_tokens[m_lexed_tokens.size() - 1].GetKind() == Token::eof) - return m_lexed_tokens[m_lexed_tokens.size() - 1]; - - // Return the invalid token. - return m_invalid_token; -} - -} // namespace dil - -} // namespace lldb_private +} // namespace lldb_private::dil diff --git a/lldb/unittests/ValueObject/CMakeLists.txt b/lldb/unittests/ValueObject/CMakeLists.txt index 952f5411a98057..14808aa2f213a5 100644 --- a/lldb/unittests/ValueObject/CMakeLists.txt +++ b/lldb/unittests/ValueObject/CMakeLists.txt @@ -6,6 +6,7 @@ add_lldb_unittest(LLDBValueObjectTests lldbValueObject lldbPluginPlatformLinux lldbPluginScriptInterpreterNone + LLVMTestingSupport LINK_COMPONENTS Support diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index 137013e40d6adf..f5523d3c5c4068 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -8,100 +8,90 @@ #include "lldb/ValueObject/DILLexer.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" #include <string> using llvm::StringRef; -bool VerifyExpectedTokens( - lldb_private::dil::DILLexer &lexer, - std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> - exp_tokens, - uint32_t start_pos) { - if (lexer.NumLexedTokens() - start_pos < exp_tokens.size()) - return false; - - if (start_pos > 0) - lexer.ResetTokenIdx(start_pos - - 1); // GetNextToken increments the idx first. - for (const auto &pair : exp_tokens) { - lldb_private::dil::Token token = lexer.GetNextToken(); - if (token.GetKind() != pair.first || token.GetSpelling() != pair.second) - return false; - } - - return true; +std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> +ExtractTokenData(lldb_private::dil::DILLexer &lexer) { + std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> data; + if (lexer.NumLexedTokens() == 0) + return data; + + lexer.ResetTokenIdx(UINT_MAX); + do { + lexer.Advance(); + lldb_private::dil::Token tok = lexer.GetCurrentToken(); + data.push_back(std::make_pair(tok.GetKind(), tok.GetSpelling())); + } while (data.back().first != lldb_private::dil::Token::eof); + return data; } TEST(DILLexerTests, SimpleTest) { StringRef input_expr("simple_var"); uint32_t tok_len = 10; - lldb_private::dil::DILLexer lexer(input_expr); - lldb_private::dil::Token token; - token.SetKind(lldb_private::dil::Token::unknown); + llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = + lldb_private::dil::DILLexer::Create(input_expr); + ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + lldb_private::dil::DILLexer lexer(*maybe_lexer); + lldb_private::dil::Token token = + lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::unknown); - auto success = lexer.LexAll(); - if (!success) { - EXPECT_TRUE(false); - } - token = lexer.GetNextToken(); + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); EXPECT_EQ(token.GetSpelling(), "simple_var"); - EXPECT_EQ(token.GetLength(), tok_len); - token = lexer.GetNextToken(); - ; + EXPECT_EQ(token.GetSpelling().size(), tok_len); + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); } TEST(DILLexerTests, TokenKindTest) { StringRef input_expr("namespace"); - lldb_private::dil::DILLexer lexer(input_expr); - lldb_private::dil::Token token; - token.SetKind(lldb_private::dil::Token::unknown); - - auto success = lexer.LexAll(); - if (!success) { - EXPECT_TRUE(false); - } - token = lexer.GetNextToken(); - - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::kw_namespace); - EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier)); + llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = + lldb_private::dil::DILLexer::Create(input_expr); + ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + lldb_private::dil::DILLexer lexer(*maybe_lexer); + lldb_private::dil::Token token = + lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); + lexer.Advance(); + token = lexer.GetCurrentToken(); + + EXPECT_TRUE(token.Is(lldb_private::dil::Token::identifier)); EXPECT_FALSE(token.Is(lldb_private::dil::Token::l_paren)); EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::eof, - lldb_private::dil::Token::kw_namespace)); + lldb_private::dil::Token::identifier)); EXPECT_FALSE(token.IsOneOf( lldb_private::dil::Token::l_paren, lldb_private::dil::Token::r_paren, lldb_private::dil::Token::coloncolon, lldb_private::dil::Token::eof)); - - token.SetKind(lldb_private::dil::Token::identifier); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); } TEST(DILLexerTests, LookAheadTest) { StringRef input_expr("(anonymous namespace)::some_var"); - lldb_private::dil::DILLexer lexer(input_expr); - lldb_private::dil::Token token; - token.SetKind(lldb_private::dil::Token::unknown); + llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = + lldb_private::dil::DILLexer::Create(input_expr); + ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + lldb_private::dil::DILLexer lexer(*maybe_lexer); + lldb_private::dil::Token token = + lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); uint32_t expect_loc = 23; - - auto success = lexer.LexAll(); - if (!success) { - EXPECT_TRUE(false); - } - token = lexer.GetNextToken(); + lexer.Advance(); + token = lexer.GetCurrentToken(); // Current token is '('; check the next 4 tokens, to make - // sure they are the identifier 'anonymous', the namespace keyword, + // sure they are the identifier 'anonymous', the identifier 'namespace' // ')' and '::', in that order. EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::l_paren); - EXPECT_EQ(lexer.LookAhead(0).GetKind(), lldb_private::dil::Token::identifier); - EXPECT_EQ(lexer.LookAhead(0).GetSpelling(), "anonymous"); - EXPECT_EQ(lexer.LookAhead(1).GetKind(), - lldb_private::dil::Token::kw_namespace); - EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::r_paren); - EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::coloncolon); + EXPECT_EQ(lexer.LookAhead(1).GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(lexer.LookAhead(1).GetSpelling(), "anonymous"); + EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(lexer.LookAhead(2).GetSpelling(), "namespace"); + EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::r_paren); + EXPECT_EQ(lexer.LookAhead(4).GetKind(), lldb_private::dil::Token::coloncolon); // Our current index should still be 0, as we only looked ahead; we are still // officially on the '('. @@ -109,110 +99,94 @@ TEST(DILLexerTests, LookAheadTest) { // Accept the 'lookahead', so our current token is '::', which has the index // 4 in our vector of tokens (which starts at zero). - token = lexer.AcceptLookAhead(3); + lexer.Advance(4); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::coloncolon); EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)4); - token = lexer.GetNextToken(); + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); EXPECT_EQ(token.GetSpelling(), "some_var"); EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)5); // Verify we've advanced our position counter (lexing location) in the // input 23 characters (the length of '(anonymous namespace)::'. EXPECT_EQ(token.GetLocation(), expect_loc); - token = lexer.GetNextToken(); + + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); } TEST(DILLexerTests, MultiTokenLexTest) { StringRef input_expr("This string has (several ) ::identifiers"); - lldb_private::dil::DILLexer lexer(input_expr); - lldb_private::dil::Token token; - token.SetKind(lldb_private::dil::Token::unknown); - - auto success = lexer.LexAll(); - if (!success) { - EXPECT_TRUE(false); - } + llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = + lldb_private::dil::DILLexer::Create(input_expr); + ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + lldb_private::dil::DILLexer lexer(*maybe_lexer); + lldb_private::dil::Token token = + lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> - expected_tokens = { - {lldb_private::dil::Token::identifier, "This"}, - {lldb_private::dil::Token::identifier, "string"}, - {lldb_private::dil::Token::identifier, "has"}, - {lldb_private::dil::Token::l_paren, "("}, - {lldb_private::dil::Token::identifier, "several"}, - {lldb_private::dil::Token::r_paren, ")"}, - {lldb_private::dil::Token::coloncolon, "::"}, - {lldb_private::dil::Token::identifier, "identifiers"}, - }; - - EXPECT_TRUE(VerifyExpectedTokens(lexer, expected_tokens, 0)); - - token = lexer.GetNextToken(); + lexer_tokens_data = ExtractTokenData(lexer); + + EXPECT_THAT( + lexer_tokens_data, + testing::ElementsAre( + testing::Pair(lldb_private::dil::Token::identifier, "This"), + testing::Pair(lldb_private::dil::Token::identifier, "string"), + testing::Pair(lldb_private::dil::Token::identifier, "has"), + testing::Pair(lldb_private::dil::Token::l_paren, "("), + testing::Pair(lldb_private::dil::Token::identifier, "several"), + testing::Pair(lldb_private::dil::Token::r_paren, ")"), + testing::Pair(lldb_private::dil::Token::coloncolon, "::"), + testing::Pair(lldb_private::dil::Token::identifier, "identifiers"), + testing::Pair(lldb_private::dil::Token::eof, ""))); + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetSpelling(), ""); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); } TEST(DILLexerTests, IdentifiersTest) { std::vector<std::string> valid_identifiers = { - "$My_name1", - "$pc", - "abcd", - "ab cd", - "_", - "_a", - "_a_", - "a_b", - "this", - "self", - "a", - "MyName" - }; - std::vector<std::string> invalid_identifiers = { - "234", - "2a", - "2", - "$", - "1MyName", - "", - "namespace" - }; + "$My_name1", "$pc", "abcd", "ab cd", "_", "_a", "_a_", + "a_b", "this", "self", "a", "MyName", "namespace"}; + std::vector<std::string> invalid_identifiers = {"234", "2a", "2", + "$", "1MyName", ""}; // Verify that all of the valid identifiers come out as identifier tokens. for (auto &str : valid_identifiers) { SCOPED_TRACE(str); - lldb_private::dil::DILLexer lexer(str); - lldb_private::dil::Token token; - token.SetKind(lldb_private::dil::Token::unknown); - - auto success = lexer.LexAll(); - if (!success) { - EXPECT_TRUE(false); - } - token = lexer.GetNextToken(); + llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = + lldb_private::dil::DILLexer::Create(str); + ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + lldb_private::dil::DILLexer lexer(*maybe_lexer); + lldb_private::dil::Token token = + lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); } // Verify that none of the invalid identifiers come out as identifier tokens. for (auto &str : invalid_identifiers) { SCOPED_TRACE(str); - lldb_private::dil::DILLexer lexer(str); - lldb_private::dil::Token token; - token.SetKind(lldb_private::dil::Token::unknown); - - auto success = lexer.LexAll(); - // In this case, it's ok for Lex() to return an error. - if (!success) { - llvm::consumeError(success.takeError()); + llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = + lldb_private::dil::DILLexer::Create(str); + if (!maybe_lexer) { + llvm::consumeError(maybe_lexer.takeError()); + // In this case, it's ok for lexing to return an error. } else { + lldb_private::dil::DILLexer lexer(*maybe_lexer); + lldb_private::dil::Token token = + lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); // We didn't get an error; make sure we did not get an identifier token. - token = lexer.GetNextToken(); + lexer.Advance(); + token = lexer.GetCurrentToken(); EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier)); EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::unknown, - lldb_private::dil::Token::none, - lldb_private::dil::Token::eof, - lldb_private::dil::Token::kw_namespace)); + lldb_private::dil::Token::eof)); } } } >From 29e9f265ea342e84372c63adbfdac0882d2fd434 Mon Sep 17 00:00:00 2001 From: Caroline Tice <cmt...@google.com> Date: Sun, 2 Feb 2025 22:06:04 -0800 Subject: [PATCH 5/5] Address more review comments: - Use std::move on std::string & std::vector in constructor initializers. - Remove some unnecessary code. - Update ExtractTokenData (helper function in unit tests) to set up the lexer and to the lexing inside the function; return an llvm::Expected value. - Add 'using namespace lldb_private::dil;' to unit tests; clean up tests accordingly. - Minor code cleanups in the unit tests. --- lldb/include/lldb/ValueObject/DILLexer.h | 9 +- lldb/source/ValueObject/DILLexer.cpp | 10 +- lldb/unittests/ValueObject/DILLexerTests.cpp | 162 ++++++++----------- 3 files changed, 72 insertions(+), 109 deletions(-) diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index 9e6cec18a68672..3935bf7e8e5c5e 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -34,7 +34,7 @@ class Token { }; Token(Kind kind, std::string spelling, uint32_t start) - : m_kind(kind), m_spelling(spelling), m_start_pos(start) {} + : m_kind(kind), m_spelling(std::move(spelling)), m_start_pos(start) {} Kind GetKind() const { return m_kind; } @@ -88,7 +88,8 @@ class DILLexer { if (m_tokens_idx + N < m_lexed_tokens.size()) return m_lexed_tokens[m_tokens_idx + N]; - return m_eof_token; + // Last token should be an 'eof' token. + return m_lexed_tokens.back(); } /// Return the index for the 'current' token being handled by the DIL parser. @@ -106,8 +107,8 @@ class DILLexer { private: DILLexer(llvm::StringRef dil_expr, std::vector<Token> lexed_tokens) - : m_expr(dil_expr), m_lexed_tokens(lexed_tokens), m_tokens_idx(UINT_MAX), - m_eof_token(Token(Token::eof, "", 0)) {} + : m_expr(dil_expr), m_lexed_tokens(std::move(lexed_tokens)), + m_tokens_idx(UINT_MAX), m_eof_token(Token(Token::eof, "", 0)) {} static llvm::Expected<Token> Lex(llvm::StringRef expr, llvm::StringRef &remainder); diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index b92bb86c8219c6..46ecea9b585f56 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -108,10 +108,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, uint32_t position = cur_pos - expr.begin(); std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder); - if (maybe_word) { - llvm::StringRef word = *maybe_word; - return Token(Token::identifier, word.str(), position); - } + if (maybe_word) + return Token(Token::identifier, maybe_word->str(), position); constexpr std::pair<Token::Kind, const char *> operators[] = { {Token::l_paren, "("}, @@ -119,10 +117,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, {Token::coloncolon, "::"}, }; for (auto [kind, str] : operators) { - if (remainder.consume_front(str)) { - cur_pos += strlen(str); + if (remainder.consume_front(str)) return Token(kind, str, position); - } } // Unrecognized character(s) in string; unable to lex it. diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index f5523d3c5c4068..b6858246b8850c 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -14,179 +14,145 @@ using llvm::StringRef; -std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> -ExtractTokenData(lldb_private::dil::DILLexer &lexer) { - std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> data; +using namespace lldb_private::dil; + +llvm::Expected<std::vector<std::pair<Token::Kind, std::string>>> +ExtractTokenData(llvm::StringRef input_expr) { + + llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(input_expr); + if (!maybe_lexer) + return maybe_lexer.takeError(); + DILLexer lexer(*maybe_lexer); + if (lexer.NumLexedTokens() == 0) - return data; + return llvm::createStringError("No lexed tokens"); lexer.ResetTokenIdx(UINT_MAX); + std::vector<std::pair<Token::Kind, std::string>> data; do { lexer.Advance(); - lldb_private::dil::Token tok = lexer.GetCurrentToken(); + Token tok = lexer.GetCurrentToken(); data.push_back(std::make_pair(tok.GetKind(), tok.GetSpelling())); - } while (data.back().first != lldb_private::dil::Token::eof); + } while (data.back().first != Token::eof); return data; } TEST(DILLexerTests, SimpleTest) { StringRef input_expr("simple_var"); - uint32_t tok_len = 10; - llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = - lldb_private::dil::DILLexer::Create(input_expr); + llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(input_expr); ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); - lldb_private::dil::DILLexer lexer(*maybe_lexer); - lldb_private::dil::Token token = - lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::unknown); + DILLexer lexer(*maybe_lexer); + Token token = Token(Token::unknown, "", 0); + EXPECT_EQ(token.GetKind(), Token::unknown); lexer.Advance(); token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(token.GetKind(), Token::identifier); EXPECT_EQ(token.GetSpelling(), "simple_var"); - EXPECT_EQ(token.GetSpelling().size(), tok_len); lexer.Advance(); token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); + EXPECT_EQ(token.GetKind(), Token::eof); } TEST(DILLexerTests, TokenKindTest) { - StringRef input_expr("namespace"); - llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = - lldb_private::dil::DILLexer::Create(input_expr); - ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); - lldb_private::dil::DILLexer lexer(*maybe_lexer); - lldb_private::dil::Token token = - lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); - lexer.Advance(); - token = lexer.GetCurrentToken(); + Token token = Token(Token::identifier, "ident", 0); - EXPECT_TRUE(token.Is(lldb_private::dil::Token::identifier)); - EXPECT_FALSE(token.Is(lldb_private::dil::Token::l_paren)); - EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::eof, - lldb_private::dil::Token::identifier)); - EXPECT_FALSE(token.IsOneOf( - lldb_private::dil::Token::l_paren, lldb_private::dil::Token::r_paren, - lldb_private::dil::Token::coloncolon, lldb_private::dil::Token::eof)); + EXPECT_TRUE(token.Is(Token::identifier)); + EXPECT_FALSE(token.Is(Token::l_paren)); + EXPECT_TRUE(token.IsOneOf(Token::eof, Token::identifier)); + EXPECT_FALSE(token.IsOneOf(Token::l_paren, Token::r_paren, Token::coloncolon, + Token::eof)); } TEST(DILLexerTests, LookAheadTest) { StringRef input_expr("(anonymous namespace)::some_var"); - llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = - lldb_private::dil::DILLexer::Create(input_expr); + llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(input_expr); ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); - lldb_private::dil::DILLexer lexer(*maybe_lexer); - lldb_private::dil::Token token = - lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); - uint32_t expect_loc = 23; + DILLexer lexer(*maybe_lexer); + Token token = Token(Token::unknown, "", 0); lexer.Advance(); token = lexer.GetCurrentToken(); // Current token is '('; check the next 4 tokens, to make // sure they are the identifier 'anonymous', the identifier 'namespace' // ')' and '::', in that order. - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::l_paren); - EXPECT_EQ(lexer.LookAhead(1).GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(token.GetKind(), Token::l_paren); + EXPECT_EQ(lexer.LookAhead(1).GetKind(), Token::identifier); EXPECT_EQ(lexer.LookAhead(1).GetSpelling(), "anonymous"); - EXPECT_EQ(lexer.LookAhead(2).GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(lexer.LookAhead(2).GetKind(), Token::identifier); EXPECT_EQ(lexer.LookAhead(2).GetSpelling(), "namespace"); - EXPECT_EQ(lexer.LookAhead(3).GetKind(), lldb_private::dil::Token::r_paren); - EXPECT_EQ(lexer.LookAhead(4).GetKind(), lldb_private::dil::Token::coloncolon); + EXPECT_EQ(lexer.LookAhead(3).GetKind(), Token::r_paren); + EXPECT_EQ(lexer.LookAhead(4).GetKind(), Token::coloncolon); // Our current index should still be 0, as we only looked ahead; we are still // officially on the '('. - EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)0); + EXPECT_EQ(lexer.GetCurrentTokenIdx(), 0u); // Accept the 'lookahead', so our current token is '::', which has the index // 4 in our vector of tokens (which starts at zero). lexer.Advance(4); token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::coloncolon); - EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)4); + EXPECT_EQ(token.GetKind(), Token::coloncolon); + EXPECT_EQ(lexer.GetCurrentTokenIdx(), 4u); lexer.Advance(); token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); + EXPECT_EQ(token.GetKind(), Token::identifier); EXPECT_EQ(token.GetSpelling(), "some_var"); - EXPECT_EQ(lexer.GetCurrentTokenIdx(), (uint32_t)5); - // Verify we've advanced our position counter (lexing location) in the - // input 23 characters (the length of '(anonymous namespace)::'. - EXPECT_EQ(token.GetLocation(), expect_loc); + EXPECT_EQ(lexer.GetCurrentTokenIdx(), 5u); + EXPECT_EQ(token.GetLocation(), strlen("(anonymous namespace)::")); lexer.Advance(); token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); + EXPECT_EQ(token.GetKind(), Token::eof); } TEST(DILLexerTests, MultiTokenLexTest) { - StringRef input_expr("This string has (several ) ::identifiers"); - llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = - lldb_private::dil::DILLexer::Create(input_expr); - ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); - lldb_private::dil::DILLexer lexer(*maybe_lexer); - lldb_private::dil::Token token = - lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); - - std::vector<std::pair<lldb_private::dil::Token::Kind, std::string>> - lexer_tokens_data = ExtractTokenData(lexer); - - EXPECT_THAT( - lexer_tokens_data, - testing::ElementsAre( - testing::Pair(lldb_private::dil::Token::identifier, "This"), - testing::Pair(lldb_private::dil::Token::identifier, "string"), - testing::Pair(lldb_private::dil::Token::identifier, "has"), - testing::Pair(lldb_private::dil::Token::l_paren, "("), - testing::Pair(lldb_private::dil::Token::identifier, "several"), - testing::Pair(lldb_private::dil::Token::r_paren, ")"), - testing::Pair(lldb_private::dil::Token::coloncolon, "::"), - testing::Pair(lldb_private::dil::Token::identifier, "identifiers"), - testing::Pair(lldb_private::dil::Token::eof, ""))); - lexer.Advance(); - token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetSpelling(), ""); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::eof); + EXPECT_THAT_EXPECTED( + ExtractTokenData("This string has (several ) ::identifiers"), + llvm::HasValue( + testing::ElementsAre(testing::Pair(Token::identifier, "This"), + testing::Pair(Token::identifier, "string"), + testing::Pair(Token::identifier, "has"), + testing::Pair(Token::l_paren, "("), + testing::Pair(Token::identifier, "several"), + testing::Pair(Token::r_paren, ")"), + testing::Pair(Token::coloncolon, "::"), + testing::Pair(Token::identifier, "identifiers"), + testing::Pair(Token::eof, "")))); } TEST(DILLexerTests, IdentifiersTest) { std::vector<std::string> valid_identifiers = { - "$My_name1", "$pc", "abcd", "ab cd", "_", "_a", "_a_", - "a_b", "this", "self", "a", "MyName", "namespace"}; + "$My_name1", "$pc", "abcd", "_", "_a", "_a_", + "a_b", "this", "self", "a", "MyName", "namespace"}; std::vector<std::string> invalid_identifiers = {"234", "2a", "2", "$", "1MyName", ""}; // Verify that all of the valid identifiers come out as identifier tokens. for (auto &str : valid_identifiers) { SCOPED_TRACE(str); - llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = - lldb_private::dil::DILLexer::Create(str); - ASSERT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); - lldb_private::dil::DILLexer lexer(*maybe_lexer); - lldb_private::dil::Token token = - lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); - lexer.Advance(); - token = lexer.GetCurrentToken(); - EXPECT_EQ(token.GetKind(), lldb_private::dil::Token::identifier); + EXPECT_THAT_EXPECTED(ExtractTokenData(str), + llvm::HasValue(testing::ElementsAre( + testing::Pair(Token::identifier, str), + testing::Pair(Token::eof, "")))); } // Verify that none of the invalid identifiers come out as identifier tokens. for (auto &str : invalid_identifiers) { SCOPED_TRACE(str); - llvm::Expected<lldb_private::dil::DILLexer> maybe_lexer = - lldb_private::dil::DILLexer::Create(str); + llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(str); if (!maybe_lexer) { llvm::consumeError(maybe_lexer.takeError()); // In this case, it's ok for lexing to return an error. } else { - lldb_private::dil::DILLexer lexer(*maybe_lexer); - lldb_private::dil::Token token = - lldb_private::dil::Token(lldb_private::dil::Token::unknown, "", 0); + DILLexer lexer(*maybe_lexer); + Token token = Token(Token::unknown, "", 0); // We didn't get an error; make sure we did not get an identifier token. lexer.Advance(); token = lexer.GetCurrentToken(); - EXPECT_TRUE(token.IsNot(lldb_private::dil::Token::identifier)); - EXPECT_TRUE(token.IsOneOf(lldb_private::dil::Token::unknown, - lldb_private::dil::Token::eof)); + EXPECT_TRUE(token.IsNot(Token::identifier)); + EXPECT_TRUE(token.IsOneOf(Token::unknown, Token::eof)); } } } _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits