Author: Ilia Kuklin Date: 2025-05-22T16:15:11+05:00 New Revision: 491619a25003c499be16708562206f9a81bff5f7
URL: https://github.com/llvm/llvm-project/commit/491619a25003c499be16708562206f9a81bff5f7 DIFF: https://github.com/llvm/llvm-project/commit/491619a25003c499be16708562206f9a81bff5f7.diff LOG: [LLDB] Add array subscription and integer parsing to DIL (#138551) Added: lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/Makefile lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/main.cpp Modified: lldb/docs/dil-expr-lang.ebnf lldb/include/lldb/ValueObject/DILAST.h lldb/include/lldb/ValueObject/DILEval.h lldb/include/lldb/ValueObject/DILLexer.h lldb/include/lldb/ValueObject/DILParser.h lldb/source/ValueObject/DILAST.cpp lldb/source/ValueObject/DILEval.cpp lldb/source/ValueObject/DILLexer.cpp lldb/source/ValueObject/DILParser.cpp lldb/unittests/ValueObject/DILLexerTests.cpp Removed: ################################################################################ diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf index c8bf4231b3e4a..d54f65df15865 100644 --- a/lldb/docs/dil-expr-lang.ebnf +++ b/lldb/docs/dil-expr-lang.ebnf @@ -6,16 +6,19 @@ expression = unary_expression ; unary_expression = unary_operator expression - | primary_expression ; + | postfix_expression ; unary_operator = "*" | "&" ; +postfix_expression = primary_expression + | postfix_expression "[" integer_literal "]"; + primary_expression = id_expression | "(" expression ")"; id_expression = unqualified_id | qualified_id - | register ; + | register ; unqualified_id = identifier ; @@ -24,6 +27,8 @@ qualified_id = ["::"] [nested_name_specifier] unqualified_id identifier = ? C99 Identifier ? ; +integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ; + register = "$" ? Register name ? ; nested_name_specifier = type_name "::" diff --git a/lldb/include/lldb/ValueObject/DILAST.h b/lldb/include/lldb/ValueObject/DILAST.h index fe3827ef0516a..c53fafefe2a0a 100644 --- a/lldb/include/lldb/ValueObject/DILAST.h +++ b/lldb/include/lldb/ValueObject/DILAST.h @@ -18,8 +18,10 @@ namespace lldb_private::dil { /// The various types DIL AST nodes (used by the DIL parser). enum class NodeKind { + eArraySubscriptNode, eErrorNode, eIdentifierNode, + eScalarLiteralNode, eUnaryOpNode, }; @@ -96,8 +98,8 @@ class UnaryOpNode : public ASTNode { llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override; - UnaryOpKind kind() const { return m_kind; } - ASTNode *operand() const { return m_operand.get(); } + UnaryOpKind GetKind() const { return m_kind; } + ASTNode *GetOperand() const { return m_operand.get(); } static bool classof(const ASTNode *node) { return node->GetKind() == NodeKind::eUnaryOpNode; @@ -108,6 +110,26 @@ class UnaryOpNode : public ASTNode { ASTNodeUP m_operand; }; +class ArraySubscriptNode : public ASTNode { +public: + ArraySubscriptNode(uint32_t location, ASTNodeUP base, int64_t index) + : ASTNode(location, NodeKind::eArraySubscriptNode), + m_base(std::move(base)), m_index(index) {} + + llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override; + + ASTNode *GetBase() const { return m_base.get(); } + int64_t GetIndex() const { return m_index; } + + static bool classof(const ASTNode *node) { + return node->GetKind() == NodeKind::eArraySubscriptNode; + } + +private: + ASTNodeUP m_base; + int64_t m_index; +}; + /// This class contains one Visit method for each specialized type of /// DIL AST node. The Visit methods are used to dispatch a DIL AST node to /// the correct function in the DIL expression evaluator for evaluating that @@ -119,6 +141,8 @@ class Visitor { Visit(const IdentifierNode *node) = 0; virtual llvm::Expected<lldb::ValueObjectSP> Visit(const UnaryOpNode *node) = 0; + virtual llvm::Expected<lldb::ValueObjectSP> + Visit(const ArraySubscriptNode *node) = 0; }; } // namespace lldb_private::dil diff --git a/lldb/include/lldb/ValueObject/DILEval.h b/lldb/include/lldb/ValueObject/DILEval.h index b1dd3fdb49739..03f869297c18f 100644 --- a/lldb/include/lldb/ValueObject/DILEval.h +++ b/lldb/include/lldb/ValueObject/DILEval.h @@ -50,6 +50,8 @@ class Interpreter : Visitor { llvm::Expected<lldb::ValueObjectSP> Visit(const IdentifierNode *node) override; llvm::Expected<lldb::ValueObjectSP> Visit(const UnaryOpNode *node) override; + llvm::Expected<lldb::ValueObjectSP> + Visit(const ArraySubscriptNode *node) override; // Used by the interpreter to create objects, perform casts, etc. lldb::TargetSP m_target; diff --git a/lldb/include/lldb/ValueObject/DILLexer.h b/lldb/include/lldb/ValueObject/DILLexer.h index 3508b8b7a85c6..0176db73835e9 100644 --- a/lldb/include/lldb/ValueObject/DILLexer.h +++ b/lldb/include/lldb/ValueObject/DILLexer.h @@ -29,7 +29,10 @@ class Token { eof, identifier, l_paren, + l_square, + numeric_constant, r_paren, + r_square, star, }; diff --git a/lldb/include/lldb/ValueObject/DILParser.h b/lldb/include/lldb/ValueObject/DILParser.h index f5c00b1040ef4..2df9678173870 100644 --- a/lldb/include/lldb/ValueObject/DILParser.h +++ b/lldb/include/lldb/ValueObject/DILParser.h @@ -84,12 +84,14 @@ class DILParser { ASTNodeUP ParseExpression(); ASTNodeUP ParseUnaryExpression(); + ASTNodeUP ParsePostfixExpression(); ASTNodeUP ParsePrimaryExpression(); std::string ParseNestedNameSpecifier(); std::string ParseIdExpression(); std::string ParseUnqualifiedId(); + std::optional<int64_t> ParseIntegerConstant(); void BailOut(const std::string &error, uint32_t loc, uint16_t err_len); diff --git a/lldb/source/ValueObject/DILAST.cpp b/lldb/source/ValueObject/DILAST.cpp index ea847587501ee..330b5a3f3c586 100644 --- a/lldb/source/ValueObject/DILAST.cpp +++ b/lldb/source/ValueObject/DILAST.cpp @@ -23,4 +23,9 @@ llvm::Expected<lldb::ValueObjectSP> UnaryOpNode::Accept(Visitor *v) const { return v->Visit(this); } +llvm::Expected<lldb::ValueObjectSP> +ArraySubscriptNode::Accept(Visitor *v) const { + return v->Visit(this); +} + } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILEval.cpp b/lldb/source/ValueObject/DILEval.cpp index 15a66d4866305..da3f9969fce2b 100644 --- a/lldb/source/ValueObject/DILEval.cpp +++ b/lldb/source/ValueObject/DILEval.cpp @@ -237,13 +237,13 @@ Interpreter::Visit(const IdentifierNode *node) { llvm::Expected<lldb::ValueObjectSP> Interpreter::Visit(const UnaryOpNode *node) { Status error; - auto rhs_or_err = Evaluate(node->operand()); + auto rhs_or_err = Evaluate(node->GetOperand()); if (!rhs_or_err) return rhs_or_err; lldb::ValueObjectSP rhs = *rhs_or_err; - switch (node->kind()) { + switch (node->GetKind()) { case UnaryOpKind::Deref: { lldb::ValueObjectSP dynamic_rhs = rhs->GetDynamicValue(m_default_dynamic); if (dynamic_rhs) @@ -272,4 +272,51 @@ Interpreter::Visit(const UnaryOpNode *node) { m_expr, "invalid ast: unexpected binary operator", node->GetLocation()); } +llvm::Expected<lldb::ValueObjectSP> +Interpreter::Visit(const ArraySubscriptNode *node) { + auto lhs_or_err = Evaluate(node->GetBase()); + if (!lhs_or_err) + return lhs_or_err; + lldb::ValueObjectSP base = *lhs_or_err; + + // Check to see if 'base' has a synthetic value; if so, try using that. + uint64_t child_idx = node->GetIndex(); + if (lldb::ValueObjectSP synthetic = base->GetSyntheticValue()) { + llvm::Expected<uint32_t> num_children = + synthetic->GetNumChildren(child_idx + 1); + if (!num_children) + return llvm::make_error<DILDiagnosticError>( + m_expr, toString(num_children.takeError()), node->GetLocation()); + if (child_idx >= *num_children) { + std::string message = llvm::formatv( + "array index {0} is not valid for \"({1}) {2}\"", child_idx, + base->GetTypeName().AsCString("<invalid type>"), + base->GetName().AsCString()); + return llvm::make_error<DILDiagnosticError>(m_expr, message, + node->GetLocation()); + } + if (lldb::ValueObjectSP child_valobj_sp = + synthetic->GetChildAtIndex(child_idx)) + return child_valobj_sp; + } + + auto base_type = base->GetCompilerType().GetNonReferenceType(); + if (!base_type.IsPointerType() && !base_type.IsArrayType()) + return llvm::make_error<DILDiagnosticError>( + m_expr, "subscripted value is not an array or pointer", + node->GetLocation()); + if (base_type.IsPointerToVoid()) + return llvm::make_error<DILDiagnosticError>( + m_expr, "subscript of pointer to incomplete type 'void'", + node->GetLocation()); + + if (base_type.IsArrayType()) { + if (lldb::ValueObjectSP child_valobj_sp = base->GetChildAtIndex(child_idx)) + return child_valobj_sp; + } + + int64_t signed_child_idx = node->GetIndex(); + return base->GetSyntheticArrayMember(signed_child_idx, true); +} + } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILLexer.cpp b/lldb/source/ValueObject/DILLexer.cpp index b9c2e7971e3b4..449feb7dee55b 100644 --- a/lldb/source/ValueObject/DILLexer.cpp +++ b/lldb/source/ValueObject/DILLexer.cpp @@ -13,6 +13,7 @@ #include "lldb/ValueObject/DILLexer.h" #include "lldb/Utility/Status.h" +#include "lldb/ValueObject/DILParser.h" #include "llvm/ADT/StringSwitch.h" namespace lldb_private::dil { @@ -29,8 +30,14 @@ llvm::StringRef Token::GetTokenName(Kind kind) { return "identifier"; case Kind::l_paren: return "l_paren"; + case Kind::l_square: + return "l_square"; + case Kind::numeric_constant: + return "numeric_constant"; case Kind::r_paren: return "r_paren"; + case Kind::r_square: + return "r_square"; case Token::star: return "star"; } @@ -57,6 +64,18 @@ static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr, return candidate; } +static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); } + +static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr, + llvm::StringRef &remainder) { + if (IsDigit(remainder[0])) { + llvm::StringRef number = remainder.take_while(IsNumberBodyChar); + remainder = remainder.drop_front(number.size()); + return number; + } + return std::nullopt; +} + llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) { std::vector<Token> tokens; llvm::StringRef remainder = expr; @@ -81,13 +100,17 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, return Token(Token::eof, "", (uint32_t)expr.size()); uint32_t position = cur_pos - expr.begin(); + std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder); + if (maybe_number) + return Token(Token::numeric_constant, maybe_number->str(), position); std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder); if (maybe_word) return Token(Token::identifier, maybe_word->str(), position); constexpr std::pair<Token::Kind, const char *> operators[] = { - {Token::amp, "&"}, {Token::coloncolon, "::"}, {Token::l_paren, "("}, - {Token::r_paren, ")"}, {Token::star, "*"}, + {Token::amp, "&"}, {Token::coloncolon, "::"}, {Token::l_paren, "("}, + {Token::l_square, "["}, {Token::r_paren, ")"}, {Token::r_square, "]"}, + {Token::star, "*"}, }; for (auto [kind, str] : operators) { if (remainder.consume_front(str)) @@ -95,7 +118,8 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr, } // Unrecognized character(s) in string; unable to lex it. - return llvm::createStringError("Unable to lex input string"); + return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token", + position); } } // namespace lldb_private::dil diff --git a/lldb/source/ValueObject/DILParser.cpp b/lldb/source/ValueObject/DILParser.cpp index 2c78eae8cf6bf..f310538617589 100644 --- a/lldb/source/ValueObject/DILParser.cpp +++ b/lldb/source/ValueObject/DILParser.cpp @@ -111,7 +111,42 @@ ASTNodeUP DILParser::ParseUnaryExpression() { llvm_unreachable("invalid token kind"); } } - return ParsePrimaryExpression(); + return ParsePostfixExpression(); +} + +// Parse a postfix_expression. +// +// postfix_expression: +// primary_expression +// postfix_expression "[" integer_literal "]" +// +ASTNodeUP DILParser::ParsePostfixExpression() { + ASTNodeUP lhs = ParsePrimaryExpression(); + while (CurToken().Is(Token::l_square)) { + uint32_t loc = CurToken().GetLocation(); + Token token = CurToken(); + switch (token.GetKind()) { + case Token::l_square: { + m_dil_lexer.Advance(); + std::optional<int64_t> rhs = ParseIntegerConstant(); + if (!rhs) { + BailOut( + llvm::formatv("failed to parse integer constant: {0}", CurToken()), + CurToken().GetLocation(), CurToken().GetSpelling().length()); + return std::make_unique<ErrorNode>(); + } + Expect(Token::r_square); + m_dil_lexer.Advance(); + lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs), + std::move(*rhs)); + break; + } + default: + llvm_unreachable("invalid token"); + } + } + + return lhs; } // Parse a primary_expression. @@ -280,6 +315,23 @@ void DILParser::BailOut(const std::string &error, uint32_t loc, m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1); } +// Parse a integer_literal. +// +// integer_literal: +// ? Integer constant ? +// +std::optional<int64_t> DILParser::ParseIntegerConstant() { + auto spelling = CurToken().GetSpelling(); + llvm::StringRef spelling_ref = spelling; + int64_t raw_value; + if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) { + m_dil_lexer.Advance(); + return raw_value; + } + + return std::nullopt; +} + void DILParser::Expect(Token::Kind kind) { if (CurToken().IsNot(kind)) { BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()), diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/Makefile b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py new file mode 100644 index 0000000000000..9cf23c5aa0fd7 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py @@ -0,0 +1,110 @@ +""" +Test DIL array subscript. +""" + +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestFrameVarDILGlobalVariableLookup(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def expect_var_path(self, expr, compare_to_framevar=False, value=None, type=None): + value_dil = super().expect_var_path(expr, value=value, type=type) + if compare_to_framevar: + self.runCmd("settings set target.experimental.use-DIL false") + value_frv = super().expect_var_path(expr, value=value, type=type) + self.runCmd("settings set target.experimental.use-DIL true") + self.assertEqual(value_dil.GetValue(), value_frv.GetValue()) + + def test_dereference(self): + self.build() + lldbutil.run_to_source_breakpoint( + self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp") + ) + + self.runCmd("settings set target.experimental.use-DIL true") + + # Test int[] and int* + self.expect_var_path("int_arr[0]", True, value="1") + self.expect_var_path("int_ptr[1]", True, value="2") + self.expect("frame var 'int_arr[enum_one]'", error=True) + + # Test when base and index are references. + self.expect_var_path("int_arr[0]", True, value="1") + self.expect("frame var 'int_arr[idx_1_ref]'", error=True) + self.expect("frame var 'int_arr[enum_ref]'", error=True) + self.expect_var_path("int_arr_ref[0]", value="1") + self.expect("frame var 'int_arr_ref[idx_1_ref]'", error=True) + self.expect("frame var 'int_arr_ref[enum_ref]'", error=True) + + # Test when base and index are typedefs. + self.expect_var_path("td_int_arr[0]", True, value="1") + self.expect("frame var 'td_int_arr[td_int_idx_1]'", error=True) + self.expect("frame var 'td_int_arr[td_td_int_idx_2]'", error=True) + self.expect_var_path("td_int_ptr[0]", True, value="1") + self.expect("frame var 'td_int_ptr[td_int_idx_1]'", error=True) + self.expect("frame var 'td_int_ptr[td_td_int_idx_2]'", error=True) + + # Both typedefs and refs + self.expect("frame var 'td_int_arr_ref[td_int_idx_1_ref]'", error=True) + + # Test for index out of bounds. + self.expect_var_path("int_arr[42]", True, type="int") + self.expect_var_path("int_arr[100]", True, type="int") + + # Test address-of of the subscripted value. + self.expect_var_path("*(&int_arr[1])", value="2") + + # Test for negative index. + self.expect( + "frame var 'int_arr[-1]'", + error=True, + substrs=["unrecognized token"], + ) + + # Test synthetic value subscription + self.expect_var_path("vector[1]", value="2") + self.expect( + "frame var 'vector[100]'", + error=True, + substrs=["array index 100 is not valid"], + ) + + # Test for floating point index + self.expect( + "frame var 'int_arr[1.0]'", + error=True, + substrs=["unrecognized token"], + ) + + # Base should be a "pointer to T" and index should be of an integral type. + self.expect( + "frame var 'idx_1[0]'", + error=True, + substrs=["subscripted value is not an array or pointer"], + ) + self.expect( + "frame var 'idx_1_ref[0]'", + error=True, + substrs=["subscripted value is not an array or pointer"], + ) + self.expect( + "frame var 'int_arr[int_ptr]'", + error=True, + substrs=["failed to parse integer constant"], + ) + self.expect( + "frame var '1[2]'", + error=True, + substrs=["Unexpected token"], + ) + + # Base should not be a pointer to void + self.expect( + "frame var 'p_void[0]'", + error=True, + substrs=["subscript of pointer to incomplete type 'void'"], + ) diff --git a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/main.cpp b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/main.cpp new file mode 100644 index 0000000000000..485666ae46c20 --- /dev/null +++ b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/main.cpp @@ -0,0 +1,32 @@ +#include <vector> + +int main(int argc, char **argv) { + int int_arr[] = {1, 2, 3}; + int *int_ptr = int_arr; + int(&int_arr_ref)[3] = int_arr; + void *p_void = (void *)int_arr; + + int idx_1 = 1; + const int &idx_1_ref = idx_1; + + typedef int td_int_t; + typedef td_int_t td_td_int_t; + typedef int *td_int_ptr_t; + typedef int &td_int_ref_t; + + td_int_t td_int_idx_1 = 1; + td_td_int_t td_td_int_idx_2 = 2; + + td_int_t td_int_arr[3] = {1, 2, 3}; + td_int_ptr_t td_int_ptr = td_int_arr; + + td_int_ref_t td_int_idx_1_ref = td_int_idx_1; + td_int_t(&td_int_arr_ref)[3] = td_int_arr; + + enum Enum { kZero, kOne } enum_one = kOne; + Enum &enum_ref = enum_one; + + std::vector<int> vector = {1, 2, 3}; + + return 0; // Set a breakpoint here +} diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp b/lldb/unittests/ValueObject/DILLexerTests.cpp index 9afa957901ae7..f65034c1dbea3 100644 --- a/lldb/unittests/ValueObject/DILLexerTests.cpp +++ b/lldb/unittests/ValueObject/DILLexerTests.cpp @@ -121,11 +121,11 @@ TEST(DILLexerTests, IdentifiersTest) { "a_b", "this", "self", "a", "MyName", "namespace"}; // The lexer can lex these strings, but they should not be identifiers. - std::vector<std::string> invalid_identifiers = {"", "::", "(", ")"}; + std::vector<std::string> invalid_identifiers = {"", "::", "(", ")", "0abc"}; // The lexer is expected to fail attempting to lex these strings (it cannot // create valid tokens out of them). - std::vector<std::string> invalid_tok_strings = {"234", "2a", "2", "1MyName"}; + std::vector<std::string> invalid_tok_strings = {"#include", "a@a"}; // Verify that all of the valid identifiers come out as identifier tokens. for (auto &str : valid_identifiers) { @@ -150,7 +150,32 @@ TEST(DILLexerTests, IdentifiersTest) { DILLexer lexer(*maybe_lexer); Token token = lexer.GetCurrentToken(); EXPECT_TRUE(token.IsNot(Token::identifier)); - EXPECT_TRUE(token.IsOneOf( - {Token::eof, Token::coloncolon, Token::l_paren, Token::r_paren})); + EXPECT_TRUE(token.IsOneOf({Token::eof, Token::coloncolon, Token::l_paren, + Token::r_paren, Token::numeric_constant})); + } +} + +TEST(DILLexerTests, NumbersTest) { + // These strings should lex into number tokens. + std::vector<std::string> valid_numbers = {"123", "0x123", "0123", "0b101"}; + + // The lexer can lex these strings, but they should not be numbers. + std::vector<std::string> invalid_numbers = {"", "x123", "b123"}; + + for (auto &str : valid_numbers) { + SCOPED_TRACE(str); + EXPECT_THAT_EXPECTED(ExtractTokenData(str), + llvm::HasValue(testing::ElementsAre( + testing::Pair(Token::numeric_constant, str)))); + } + // Verify that none of the invalid numbers come out as numeric tokens. + for (auto &str : invalid_numbers) { + SCOPED_TRACE(str); + llvm::Expected<DILLexer> maybe_lexer = DILLexer::Create(str); + EXPECT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded()); + DILLexer lexer(*maybe_lexer); + Token token = lexer.GetCurrentToken(); + EXPECT_TRUE(token.IsNot(Token::numeric_constant)); + EXPECT_TRUE(token.IsOneOf({Token::eof, Token::identifier})); } } _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits