[Lldb-commits] [lldb] [llvm] [LLDB] Add `ScalarLiteralNode` and literal parsing in DIL (PR #152308)

Ilia Kuklin via lldb-commits Wed, 06 Aug 2025 06:51:42 -0700

https://github.com/kuilpd created 
https://github.com/llvm/llvm-project/pull/152308


This patch introduces `ScalarLiteralNode` without any uses by other nodes yet. 
It also includes lexing and parsing for integer and floating point numbers, and 
makes a function `getAutoSenseRadix` in `StringRef` global.


>From 330a34ed85f80563b9d6f204b59e1f3172c9f09a Mon Sep 17 00:00:00 2001
From: Ilia Kuklin <ikuk...@accesssoftek.com>
Date: Fri, 1 Aug 2025 20:14:59 +0500
Subject: [PATCH 1/3] Add ScalarLiteralNode

---
 lldb/include/lldb/ValueObject/DILAST.h        | 23 +++++++++
 lldb/include/lldb/ValueObject/DILEval.h       |  2 +
 lldb/include/lldb/ValueObject/DILParser.h     |  2 +
 lldb/source/ValueObject/DILAST.cpp            |  5 ++
 lldb/source/ValueObject/DILEval.cpp           | 28 +++++++++++
 lldb/source/ValueObject/DILParser.cpp         | 48 +++++++++++++++++++
 .../TestFrameVarDILArraySubscript.py          |  2 +-
 .../Indirection/TestFrameVarDILIndirection.py |  2 +-
 8 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/lldb/include/lldb/ValueObject/DILAST.h 
b/lldb/include/lldb/ValueObject/DILAST.h
index 709f0639135f1..a174e28ea5c06 100644
--- a/lldb/include/lldb/ValueObject/DILAST.h
+++ b/lldb/include/lldb/ValueObject/DILAST.h
@@ -23,6 +23,7 @@ enum class NodeKind {
   eErrorNode,
   eIdentifierNode,
   eMemberOfNode,
+  eScalarLiteralNode,
   eUnaryOpNode,
 };
 
@@ -178,6 +179,26 @@ class BitFieldExtractionNode : public ASTNode {
   int64_t m_last_index;
 };
 
+class ScalarLiteralNode : public ASTNode {
+public:
+  ScalarLiteralNode(uint32_t location, lldb::BasicType type, Scalar value)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_type(type),
+        m_value(value) {}
+
+  llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
+
+  lldb::BasicType GetType() const { return m_type; }
+  Scalar GetValue() const & { return m_value; }
+
+  static bool classof(const ASTNode *node) {
+    return node->GetKind() == NodeKind::eScalarLiteralNode;
+  }
+
+private:
+  lldb::BasicType m_type;
+  Scalar m_value;
+};
+
 /// This class contains one Visit method for each specialized type of
 /// DIL AST node. The Visit methods are used to dispatch a DIL AST node to
 /// the correct function in the DIL expression evaluator for evaluating that
@@ -195,6 +216,8 @@ class Visitor {
   Visit(const ArraySubscriptNode *node) = 0;
   virtual llvm::Expected<lldb::ValueObjectSP>
   Visit(const BitFieldExtractionNode *node) = 0;
+  virtual llvm::Expected<lldb::ValueObjectSP>
+  Visit(const ScalarLiteralNode *node) = 0;
 };
 
 } // namespace lldb_private::dil
diff --git a/lldb/include/lldb/ValueObject/DILEval.h 
b/lldb/include/lldb/ValueObject/DILEval.h
index 45e29b3ddcd7b..cb2a81d1c7ba1 100644
--- a/lldb/include/lldb/ValueObject/DILEval.h
+++ b/lldb/include/lldb/ValueObject/DILEval.h
@@ -54,6 +54,8 @@ class Interpreter : Visitor {
   Visit(const ArraySubscriptNode *node) override;
   llvm::Expected<lldb::ValueObjectSP>
   Visit(const BitFieldExtractionNode *node) override;
+  llvm::Expected<lldb::ValueObjectSP>
+  Visit(const ScalarLiteralNode *node) override;
 
   // Used by the interpreter to create objects, perform casts, etc.
   lldb::TargetSP m_target;
diff --git a/lldb/include/lldb/ValueObject/DILParser.h 
b/lldb/include/lldb/ValueObject/DILParser.h
index 9eda7bac4a364..2cd8ca3be3c02 100644
--- a/lldb/include/lldb/ValueObject/DILParser.h
+++ b/lldb/include/lldb/ValueObject/DILParser.h
@@ -96,6 +96,8 @@ class DILParser {
   std::string ParseIdExpression();
   std::string ParseUnqualifiedId();
   std::optional<int64_t> ParseIntegerConstant();
+  ASTNodeUP ParseNumericLiteral();
+  ASTNodeUP ParseNumericConstant();
 
   void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);
 
diff --git a/lldb/source/ValueObject/DILAST.cpp 
b/lldb/source/ValueObject/DILAST.cpp
index b1cd824c2299e..38215ae18f6ce 100644
--- a/lldb/source/ValueObject/DILAST.cpp
+++ b/lldb/source/ValueObject/DILAST.cpp
@@ -37,4 +37,9 @@ BitFieldExtractionNode::Accept(Visitor *v) const {
   return v->Visit(this);
 }
 
+llvm::Expected<lldb::ValueObjectSP>
+ScalarLiteralNode::Accept(Visitor *v) const {
+  return v->Visit(this);
+}
+
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILEval.cpp 
b/lldb/source/ValueObject/DILEval.cpp
index 6f28434c646cd..18cc30d589829 100644
--- a/lldb/source/ValueObject/DILEval.cpp
+++ b/lldb/source/ValueObject/DILEval.cpp
@@ -7,7 +7,9 @@
 
//===----------------------------------------------------------------------===//
 
 #include "lldb/ValueObject/DILEval.h"
+#include "lldb/Core/Module.h"
 #include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/TypeSystem.h"
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/RegisterContext.h"
 #include "lldb/ValueObject/DILAST.h"
@@ -402,4 +404,30 @@ Interpreter::Visit(const BitFieldExtractionNode *node) {
   return child_valobj_sp;
 }
 
+static CompilerType GetBasicTypeFromCU(std::shared_ptr<StackFrame> ctx,
+                                       lldb::BasicType basic_type) {
+  SymbolContext symbol_context =
+      ctx->GetSymbolContext(lldb::eSymbolContextCompUnit);
+  auto language = symbol_context.comp_unit->GetLanguage();
+
+  symbol_context = ctx->GetSymbolContext(lldb::eSymbolContextModule);
+  auto type_system =
+      symbol_context.module_sp->GetTypeSystemForLanguage(language);
+
+  if (type_system)
+    if (auto compiler_type = 
type_system.get()->GetBasicTypeFromAST(basic_type))
+      return compiler_type;
+
+  return CompilerType();
+}
+
+llvm::Expected<lldb::ValueObjectSP>
+Interpreter::Visit(const ScalarLiteralNode *node) {
+  CompilerType result_type =
+      GetBasicTypeFromCU(m_exe_ctx_scope, node->GetType());
+  Scalar value = node->GetValue();
+  return ValueObject::CreateValueObjectFromScalar(m_target, value, result_type,
+                                                  "result");
+}
+
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILParser.cpp 
b/lldb/source/ValueObject/DILParser.cpp
index eac41fab90763..91b9d764527b3 100644
--- a/lldb/source/ValueObject/DILParser.cpp
+++ b/lldb/source/ValueObject/DILParser.cpp
@@ -183,6 +183,8 @@ ASTNodeUP DILParser::ParsePostfixExpression() {
 //    "(" expression ")"
 //
 ASTNodeUP DILParser::ParsePrimaryExpression() {
+  if (CurToken().Is(Token::numeric_constant))
+    return ParseNumericLiteral();
   if (CurToken().IsOneOf(
           {Token::coloncolon, Token::identifier, Token::l_paren})) {
     // Save the source location for the diagnostics message.
@@ -370,6 +372,52 @@ std::optional<int64_t> DILParser::ParseIntegerConstant() {
   return std::nullopt;
 }
 
+// Parse a numeric_literal.
+//
+//  numeric_literal:
+//    ? Token::numeric_constant ?
+//
+ASTNodeUP DILParser::ParseNumericLiteral() {
+  Expect(Token::numeric_constant);
+  ASTNodeUP numeric_constant = ParseNumericConstant();
+  if (numeric_constant->GetKind() == NodeKind::eErrorNode) {
+    BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
+                          CurToken()),
+            CurToken().GetLocation(), CurToken().GetSpelling().length());
+    return std::make_unique<ErrorNode>();
+  }
+  m_dil_lexer.Advance();
+  return numeric_constant;
+}
+
+static constexpr std::pair<const char *, lldb::BasicType> type_suffixes[] = {
+    {"ull", lldb::eBasicTypeUnsignedLongLong},
+    {"ul", lldb::eBasicTypeUnsignedLong},
+    {"u", lldb::eBasicTypeUnsignedInt},
+    {"ll", lldb::eBasicTypeLongLong},
+    {"l", lldb::eBasicTypeLong},
+};
+
+ASTNodeUP DILParser::ParseNumericConstant() {
+  Token token = CurToken();
+  auto spelling = token.GetSpelling();
+  llvm::StringRef spelling_ref = spelling;
+  lldb::BasicType type = lldb::eBasicTypeInt;
+  for (auto [suffix, t] : type_suffixes) {
+    if (spelling_ref.consume_back_insensitive(suffix)) {
+      type = t;
+      break;
+    }
+  }
+  llvm::APInt raw_value;
+  if (!spelling_ref.getAsInteger(0, raw_value)) {
+    Scalar scalar_value(raw_value);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(), type,
+                                               scalar_value);
+  }
+  return std::make_unique<ErrorNode>();
+}
+
 void DILParser::Expect(Token::Kind kind) {
   if (CurToken().IsNot(kind)) {
     BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
diff --git 
a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
 
b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
index 0f56057189395..c2ab049a28bc2 100644
--- 
a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
+++ 
b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
@@ -88,7 +88,7 @@ def test_subscript(self):
         self.expect(
             "frame var '1[2]'",
             error=True,
-            substrs=["Unexpected token"],
+            substrs=["subscripted value is not an array or pointer"],
         )
 
         # Base should not be a pointer to void
diff --git 
a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
 
b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
index 38c72131d797c..28eba4f1a70bc 100644
--- 
a/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
+++ 
b/lldb/test/API/commands/frame/var-dil/basics/Indirection/TestFrameVarDILIndirection.py
@@ -35,7 +35,7 @@ def test_frame_var(self):
         self.expect(
             "frame variable '*1'",
             error=True,
-            substrs=["Unexpected token: <'1' (numeric_constant)>"],
+            substrs=["dereference failed: not a pointer, reference or array 
type"],
         )
         self.expect(
             "frame variable '*val'",

>From 8a04a676f24666c238840f8e568b9276ad3a86f7 Mon Sep 17 00:00:00 2001
From: Ilia Kuklin <ikuk...@accesssoftek.com>
Date: Fri, 18 Jul 2025 21:52:53 +0500
Subject: [PATCH 2/3] Lex and parse floating numbers; split number tokens to
 avoid double parsing

---
 lldb/docs/dil-expr-lang.ebnf                  |  3 ++
 lldb/include/lldb/ValueObject/DILLexer.h      |  3 +-
 lldb/include/lldb/ValueObject/DILParser.h     |  3 +-
 lldb/source/ValueObject/DILLexer.cpp          | 54 +++++++++++++++----
 lldb/source/ValueObject/DILParser.cpp         | 37 +++++++++++--
 .../TestFrameVarDILArraySubscript.py          |  2 +-
 .../frame/var-dil/expr/Arithmetic/Makefile    |  3 ++
 .../Arithmetic/TestFrameVarDILArithmetic.py   | 30 +++++++++++
 .../frame/var-dil/expr/Arithmetic/main.cpp    |  3 ++
 lldb/unittests/ValueObject/DILLexerTests.cpp  | 21 +++++---
 10 files changed, 134 insertions(+), 25 deletions(-)
 create mode 100644 
lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
 create mode 100644 
lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
 create mode 100644 
lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp

diff --git a/lldb/docs/dil-expr-lang.ebnf b/lldb/docs/dil-expr-lang.ebnf
index 783432dabd6db..da1796d936c6a 100644
--- a/lldb/docs/dil-expr-lang.ebnf
+++ b/lldb/docs/dil-expr-lang.ebnf
@@ -31,6 +31,9 @@ identifier = ? C99 Identifier ? ;
 
 integer_literal = ? Integer constant: hexademical, decimal, octal, binary ? ;
 
+numeric_literal = ? Integer constant: hexademical, decimal, octal, binary ?
+                | ? Floating constant ? ;
+
 register = "$" ? Register name ? ;
 
 nested_name_specifier = type_name "::"
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h 
b/lldb/include/lldb/ValueObject/DILLexer.h
index 9c1ba97680253..a9f01785c6c20 100644
--- a/lldb/include/lldb/ValueObject/DILLexer.h
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -28,11 +28,12 @@ class Token {
     arrow,
     coloncolon,
     eof,
+    floating_constant,
     identifier,
+    integer_constant,
     l_paren,
     l_square,
     minus,
-    numeric_constant,
     period,
     r_paren,
     r_square,
diff --git a/lldb/include/lldb/ValueObject/DILParser.h 
b/lldb/include/lldb/ValueObject/DILParser.h
index 2cd8ca3be3c02..90df109337dcf 100644
--- a/lldb/include/lldb/ValueObject/DILParser.h
+++ b/lldb/include/lldb/ValueObject/DILParser.h
@@ -97,7 +97,8 @@ class DILParser {
   std::string ParseUnqualifiedId();
   std::optional<int64_t> ParseIntegerConstant();
   ASTNodeUP ParseNumericLiteral();
-  ASTNodeUP ParseNumericConstant();
+  ASTNodeUP ParseIntegerLiteral();
+  ASTNodeUP ParseFloatingPointLiteral();
 
   void BailOut(const std::string &error, uint32_t loc, uint16_t err_len);
 
diff --git a/lldb/source/ValueObject/DILLexer.cpp 
b/lldb/source/ValueObject/DILLexer.cpp
index eaefaf484bc18..00f9a0c515461 100644
--- a/lldb/source/ValueObject/DILLexer.cpp
+++ b/lldb/source/ValueObject/DILLexer.cpp
@@ -28,16 +28,18 @@ llvm::StringRef Token::GetTokenName(Kind kind) {
     return "coloncolon";
   case Kind::eof:
     return "eof";
+  case Kind::floating_constant:
+    return "floating_constant";
   case Kind::identifier:
     return "identifier";
+  case Kind::integer_constant:
+    return "integer_constant";
   case Kind::l_paren:
     return "l_paren";
   case Kind::l_square:
     return "l_square";
   case Kind::minus:
     return "minus";
-  case Kind::numeric_constant:
-    return "numeric_constant";
   case Kind::period:
     return "period";
   case Kind::r_paren:
@@ -72,12 +74,39 @@ static std::optional<llvm::StringRef> 
IsWord(llvm::StringRef expr,
 
 static bool IsNumberBodyChar(char ch) { return IsDigit(ch) || IsLetter(ch); }
 
-static std::optional<llvm::StringRef> IsNumber(llvm::StringRef expr,
-                                               llvm::StringRef &remainder) {
-  if (IsDigit(remainder[0])) {
-    llvm::StringRef number = remainder.take_while(IsNumberBodyChar);
-    remainder = remainder.drop_front(number.size());
-    return number;
+static std::optional<llvm::StringRef> IsNumber(llvm::StringRef &remainder,
+                                               bool &isFloat) {
+  llvm::StringRef::iterator cur_pos = remainder.begin();
+  if (*cur_pos == '.') {
+    auto next_pos = cur_pos + 1;
+    if (next_pos == remainder.end() || !IsDigit(*next_pos))
+      return std::nullopt;
+  }
+  if (IsDigit(*(cur_pos)) || *(cur_pos) == '.') {
+    while (IsNumberBodyChar(*cur_pos))
+      cur_pos++;
+
+    if (*cur_pos == '.') {
+      isFloat = true;
+      cur_pos++;
+      while (IsNumberBodyChar(*cur_pos))
+        cur_pos++;
+
+      // Check if there's an exponent
+      char prev_ch = *(cur_pos - 1);
+      if (prev_ch == 'e' || prev_ch == 'E' || prev_ch == 'p' ||
+          prev_ch == 'P') {
+        if (*(cur_pos) == '+' || *(cur_pos) == '-') {
+          cur_pos++;
+          while (IsNumberBodyChar(*cur_pos))
+            cur_pos++;
+        }
+      }
+    }
+
+    llvm::StringRef number = remainder.substr(0, cur_pos - remainder.begin());
+    if (remainder.consume_front(number))
+      return number;
   }
   return std::nullopt;
 }
@@ -106,9 +135,12 @@ llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
     return Token(Token::eof, "", (uint32_t)expr.size());
 
   uint32_t position = cur_pos - expr.begin();
-  std::optional<llvm::StringRef> maybe_number = IsNumber(expr, remainder);
-  if (maybe_number)
-    return Token(Token::numeric_constant, maybe_number->str(), position);
+  bool isFloat = false;
+  std::optional<llvm::StringRef> maybe_number = IsNumber(remainder, isFloat);
+  if (maybe_number) {
+    auto kind = isFloat ? Token::floating_constant : Token::integer_constant;
+    return Token(kind, maybe_number->str(), position);
+  }
   std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
   if (maybe_word)
     return Token(Token::identifier, maybe_word->str(), position);
diff --git a/lldb/source/ValueObject/DILParser.cpp 
b/lldb/source/ValueObject/DILParser.cpp
index 91b9d764527b3..eee4a7bc4d363 100644
--- a/lldb/source/ValueObject/DILParser.cpp
+++ b/lldb/source/ValueObject/DILParser.cpp
@@ -179,11 +179,12 @@ ASTNodeUP DILParser::ParsePostfixExpression() {
 // Parse a primary_expression.
 //
 //  primary_expression:
+//    numeric_literal
 //    id_expression
 //    "(" expression ")"
 //
 ASTNodeUP DILParser::ParsePrimaryExpression() {
-  if (CurToken().Is(Token::numeric_constant))
+  if (CurToken().IsOneOf({Token::integer_constant, Token::floating_constant}))
     return ParseNumericLiteral();
   if (CurToken().IsOneOf(
           {Token::coloncolon, Token::identifier, Token::l_paren})) {
@@ -348,6 +349,7 @@ void DILParser::BailOut(const std::string &error, uint32_t 
loc,
   m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
 }
 
+// FIXME: Remove this once subscript operator uses ScalarLiteralNode.
 // Parse a integer_literal.
 //
 //  integer_literal:
@@ -375,11 +377,15 @@ std::optional<int64_t> DILParser::ParseIntegerConstant() {
 // Parse a numeric_literal.
 //
 //  numeric_literal:
-//    ? Token::numeric_constant ?
+//    ? Token::integer_constant ?
+//    ? Token::floating_constant ?
 //
 ASTNodeUP DILParser::ParseNumericLiteral() {
-  Expect(Token::numeric_constant);
-  ASTNodeUP numeric_constant = ParseNumericConstant();
+  ASTNodeUP numeric_constant;
+  if (CurToken().Is(Token::integer_constant))
+    numeric_constant = ParseIntegerLiteral();
+  else
+    numeric_constant = ParseFloatingPointLiteral();
   if (numeric_constant->GetKind() == NodeKind::eErrorNode) {
     BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
                           CurToken()),
@@ -398,7 +404,7 @@ static constexpr std::pair<const char *, lldb::BasicType> 
type_suffixes[] = {
     {"l", lldb::eBasicTypeLong},
 };
 
-ASTNodeUP DILParser::ParseNumericConstant() {
+ASTNodeUP DILParser::ParseIntegerLiteral() {
   Token token = CurToken();
   auto spelling = token.GetSpelling();
   llvm::StringRef spelling_ref = spelling;
@@ -418,6 +424,27 @@ ASTNodeUP DILParser::ParseNumericConstant() {
   return std::make_unique<ErrorNode>();
 }
 
+ASTNodeUP DILParser::ParseFloatingPointLiteral() {
+  Token token = CurToken();
+  auto spelling = token.GetSpelling();
+  llvm::StringRef spelling_ref = spelling;
+  spelling_ref = spelling;
+  lldb::BasicType type = lldb::eBasicTypeDouble;
+  llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
+  if (spelling_ref.consume_back_insensitive("f")) {
+    type = lldb::eBasicTypeFloat;
+    raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
+  }
+  auto StatusOrErr = raw_float.convertFromString(
+      spelling_ref, llvm::APFloat::rmNearestTiesToEven);
+  if (!errorToBool(StatusOrErr.takeError())) {
+    Scalar scalar_value(raw_float);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(), type,
+                                               scalar_value);
+  }
+  return std::make_unique<ErrorNode>();
+}
+
 void DILParser::Expect(Token::Kind kind) {
   if (CurToken().IsNot(kind)) {
     BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
diff --git 
a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
 
b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
index c2ab049a28bc2..1d0340160f5e4 100644
--- 
a/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
+++ 
b/lldb/test/API/commands/frame/var-dil/basics/ArraySubscript/TestFrameVarDILArraySubscript.py
@@ -66,7 +66,7 @@ def test_subscript(self):
         self.expect(
             "frame var 'int_arr[1.0]'",
             error=True,
-            substrs=["expected 'r_square', got: <'.'"],
+            substrs=["failed to parse integer constant: <'1.0' 
(floating_constant)>"],
         )
 
         # Base should be a "pointer to T" and index should be of an integral 
type.
diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile 
b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git 
a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
 
b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
new file mode 100644
index 0000000000000..883781873a525
--- /dev/null
+++ 
b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
@@ -0,0 +1,30 @@
+"""
+Test DIL arithmetic.
+"""
+
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+from lldbsuite.test import lldbutil
+
+
+class TestFrameVarDILArithmetic(TestBase):
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def test_arithmetic(self):
+        self.build()
+        lldbutil.run_to_source_breakpoint(
+            self, "Set a breakpoint here", lldb.SBFileSpec("main.cpp")
+        )
+
+        self.runCmd("settings set target.experimental.use-DIL true")
+
+        # Check number parsing
+        self.expect_var_path("1", value="1", type="int")
+        self.expect_var_path("1ull", value="1", type="unsigned long long")
+        self.expect_var_path("0b10", value="2", type="int")
+        self.expect_var_path("010", value="8", type="int")
+        self.expect_var_path("0x10", value="16", type="int")
+        self.expect_var_path("1.0", value="1", type="double")
+        self.expect_var_path("1.0f", value="1", type="float")
+        self.expect_var_path("0x1.2p+3f", value="9", type="float")
diff --git a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp 
b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp
new file mode 100644
index 0000000000000..c9bd8afb0d71d
--- /dev/null
+++ b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/main.cpp
@@ -0,0 +1,3 @@
+int main(int argc, char **argv) {
+  return 0; // Set a breakpoint here
+}
diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp 
b/lldb/unittests/ValueObject/DILLexerTests.cpp
index f65034c1dbea3..ff5234ac64451 100644
--- a/lldb/unittests/ValueObject/DILLexerTests.cpp
+++ b/lldb/unittests/ValueObject/DILLexerTests.cpp
@@ -151,22 +151,31 @@ TEST(DILLexerTests, IdentifiersTest) {
     Token token = lexer.GetCurrentToken();
     EXPECT_TRUE(token.IsNot(Token::identifier));
     EXPECT_TRUE(token.IsOneOf({Token::eof, Token::coloncolon, Token::l_paren,
-                               Token::r_paren, Token::numeric_constant}));
+                               Token::r_paren, Token::integer_constant}));
   }
 }
 
 TEST(DILLexerTests, NumbersTest) {
   // These strings should lex into number tokens.
-  std::vector<std::string> valid_numbers = {"123", "0x123", "0123", "0b101"};
+  std::vector<std::string> valid_integers = {"123", "0x123", "0123", "0b101"};
+  std::vector<std::string> valid_floats = {
+      "1.2",    ".2",    "2.f",     "0x1.2",    "0x.2",     ".2e1f",
+      "2.e+1f", "0x1.f", "0x1.2p1", "0x1.p-1f", "0x1.2p+3f"};
 
   // The lexer can lex these strings, but they should not be numbers.
-  std::vector<std::string> invalid_numbers = {"", "x123", "b123"};
+  std::vector<std::string> invalid_numbers = {"", "x123", "b123", "a.b"};
 
-  for (auto &str : valid_numbers) {
+  for (auto &str : valid_integers) {
     SCOPED_TRACE(str);
     EXPECT_THAT_EXPECTED(ExtractTokenData(str),
                          llvm::HasValue(testing::ElementsAre(
-                             testing::Pair(Token::numeric_constant, str))));
+                             testing::Pair(Token::integer_constant, str))));
+  }
+  for (auto &str : valid_floats) {
+    SCOPED_TRACE(str);
+    EXPECT_THAT_EXPECTED(ExtractTokenData(str),
+                         llvm::HasValue(testing::ElementsAre(
+                             testing::Pair(Token::floating_constant, str))));
   }
   // Verify that none of the invalid numbers come out as numeric tokens.
   for (auto &str : invalid_numbers) {
@@ -175,7 +184,7 @@ TEST(DILLexerTests, NumbersTest) {
     EXPECT_THAT_EXPECTED(maybe_lexer, llvm::Succeeded());
     DILLexer lexer(*maybe_lexer);
     Token token = lexer.GetCurrentToken();
-    EXPECT_TRUE(token.IsNot(Token::numeric_constant));
+    EXPECT_TRUE(token.IsNot(Token::integer_constant));
     EXPECT_TRUE(token.IsOneOf({Token::eof, Token::identifier}));
   }
 }

>From 27466c3529741d86982e4ebd29764c6812a6c264 Mon Sep 17 00:00:00 2001
From: Ilia Kuklin <ikuk...@accesssoftek.com>
Date: Tue, 5 Aug 2025 21:30:15 +0500
Subject: [PATCH 3/3] Automatically pick integer literal type

---
 lldb/include/lldb/ValueObject/DILAST.h        | 24 ++++-
 lldb/include/lldb/ValueObject/DILEval.h       |  5 +
 lldb/source/ValueObject/DILEval.cpp           | 98 +++++++++++++++++--
 lldb/source/ValueObject/DILParser.cpp         | 43 ++++----
 .../Arithmetic/TestFrameVarDILArithmetic.py   | 19 +++-
 llvm/include/llvm/ADT/StringRef.h             |  2 +
 llvm/lib/Support/StringRef.cpp                |  6 +-
 7 files changed, 153 insertions(+), 44 deletions(-)

diff --git a/lldb/include/lldb/ValueObject/DILAST.h 
b/lldb/include/lldb/ValueObject/DILAST.h
index a174e28ea5c06..a0e5909a8c6a7 100644
--- a/lldb/include/lldb/ValueObject/DILAST.h
+++ b/lldb/include/lldb/ValueObject/DILAST.h
@@ -181,22 +181,36 @@ class BitFieldExtractionNode : public ASTNode {
 
 class ScalarLiteralNode : public ASTNode {
 public:
-  ScalarLiteralNode(uint32_t location, lldb::BasicType type, Scalar value)
-      : ASTNode(location, NodeKind::eScalarLiteralNode), m_type(type),
-        m_value(value) {}
+  ScalarLiteralNode(uint32_t location, Scalar value, uint32_t radix,
+                    bool is_unsigned, bool is_long, bool is_longlong)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_radix(radix), m_is_unsigned(is_unsigned), m_is_long(is_long),
+        m_is_longlong(is_longlong) {}
+
+  ScalarLiteralNode(uint32_t location, Scalar value, bool is_float)
+      : ASTNode(location, NodeKind::eScalarLiteralNode), m_value(value),
+        m_is_float(is_float) {}
 
   llvm::Expected<lldb::ValueObjectSP> Accept(Visitor *v) const override;
 
-  lldb::BasicType GetType() const { return m_type; }
   Scalar GetValue() const & { return m_value; }
+  uint32_t GetRadix() const { return m_radix; }
+  bool IsUnsigned() const { return m_is_unsigned; }
+  bool IsLong() const { return m_is_long; }
+  bool IsLongLong() const { return m_is_longlong; }
+  bool IsFloat() const { return m_is_float; }
 
   static bool classof(const ASTNode *node) {
     return node->GetKind() == NodeKind::eScalarLiteralNode;
   }
 
 private:
-  lldb::BasicType m_type;
   Scalar m_value;
+  uint32_t m_radix;
+  bool m_is_unsigned;
+  bool m_is_long;
+  bool m_is_longlong;
+  bool m_is_float;
 };
 
 /// This class contains one Visit method for each specialized type of
diff --git a/lldb/include/lldb/ValueObject/DILEval.h 
b/lldb/include/lldb/ValueObject/DILEval.h
index cb2a81d1c7ba1..22a6c5bd0af9a 100644
--- a/lldb/include/lldb/ValueObject/DILEval.h
+++ b/lldb/include/lldb/ValueObject/DILEval.h
@@ -57,6 +57,11 @@ class Interpreter : Visitor {
   llvm::Expected<lldb::ValueObjectSP>
   Visit(const ScalarLiteralNode *node) override;
 
+  llvm::Expected<CompilerType>
+  PickLiteralType(lldb::TypeSystemSP type_system,
+                  std::shared_ptr<ExecutionContextScope> ctx,
+                  const ScalarLiteralNode *literal);
+
   // Used by the interpreter to create objects, perform casts, etc.
   lldb::TargetSP m_target;
   llvm::StringRef m_expr;
diff --git a/lldb/source/ValueObject/DILEval.cpp 
b/lldb/source/ValueObject/DILEval.cpp
index 18cc30d589829..ee667e9306ca5 100644
--- a/lldb/source/ValueObject/DILEval.cpp
+++ b/lldb/source/ValueObject/DILEval.cpp
@@ -404,8 +404,7 @@ Interpreter::Visit(const BitFieldExtractionNode *node) {
   return child_valobj_sp;
 }
 
-static CompilerType GetBasicTypeFromCU(std::shared_ptr<StackFrame> ctx,
-                                       lldb::BasicType basic_type) {
+static lldb::TypeSystemSP GetTypeSystemFromCU(std::shared_ptr<StackFrame> ctx) 
{
   SymbolContext symbol_context =
       ctx->GetSymbolContext(lldb::eSymbolContextCompUnit);
   auto language = symbol_context.comp_unit->GetLanguage();
@@ -414,20 +413,103 @@ static CompilerType 
GetBasicTypeFromCU(std::shared_ptr<StackFrame> ctx,
   auto type_system =
       symbol_context.module_sp->GetTypeSystemForLanguage(language);
 
+  if (type_system)
+    return *type_system;
+
+  return lldb::TypeSystemSP();
+}
+
+static CompilerType GetBasicType(lldb::TypeSystemSP type_system,
+                                 lldb::BasicType basic_type) {
   if (type_system)
     if (auto compiler_type = 
type_system.get()->GetBasicTypeFromAST(basic_type))
       return compiler_type;
 
-  return CompilerType();
+  CompilerType empty_type;
+  return empty_type;
+}
+
+llvm::Expected<CompilerType>
+Interpreter::PickLiteralType(lldb::TypeSystemSP type_system,
+                             std::shared_ptr<ExecutionContextScope> ctx,
+                             const ScalarLiteralNode *literal) {
+  Scalar scalar = literal->GetValue();
+  if (scalar.GetType() == Scalar::e_float) {
+    if (literal->IsFloat())
+      return GetBasicType(type_system, lldb::eBasicTypeFloat);
+    return GetBasicType(type_system, lldb::eBasicTypeDouble);
+  } else if (scalar.GetType() == Scalar::e_int) {
+    // Binary, Octal, Hexadecimal and literals with a U suffix are allowed to 
be
+    // an unsigned integer.
+    bool unsigned_is_allowed =
+        literal->IsUnsigned() || literal->GetRadix() != 10;
+
+    // Try int/unsigned int.
+    uint64_t int_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeInt)
+                        .GetByteSize(ctx.get()))
+      int_byte_size = *temp;
+    unsigned int_size = int_byte_size * CHAR_BIT;
+    llvm::APInt apint = scalar.GetAPSInt();
+    if (!literal->IsLong() && !literal->IsLongLong() &&
+        apint.isIntN(int_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(int_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeInt);
+      if (unsigned_is_allowed)
+        return GetBasicType(type_system, lldb::eBasicTypeUnsignedInt);
+    }
+    // Try long/unsigned long.
+    uint64_t long_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLong)
+                        .GetByteSize(ctx.get()))
+      long_byte_size = *temp;
+    unsigned long_size = long_byte_size * CHAR_BIT;
+    if (!literal->IsLongLong() && apint.isIntN(long_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(long_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeLong);
+      if (unsigned_is_allowed)
+        return GetBasicType(type_system, lldb::eBasicTypeUnsignedLong);
+    }
+    // Try long long/unsigned long long.
+    uint64_t long_long_byte_size = 0;
+    if (auto temp = GetBasicType(type_system, lldb::eBasicTypeLongLong)
+                        .GetByteSize(ctx.get()))
+      long_long_byte_size = *temp;
+    unsigned long_long_size = long_long_byte_size * CHAR_BIT;
+    if (apint.isIntN(long_long_size)) {
+      if (!literal->IsUnsigned() && apint.isIntN(long_long_size - 1))
+        return GetBasicType(type_system, lldb::eBasicTypeLongLong);
+      // If we still couldn't decide a type, we probably have something that
+      // does not fit in a signed long long, but has no U suffix. Also known 
as:
+      //
+      //  warning: integer literal is too large to be represented in a signed
+      //  integer type, interpreting as unsigned 
[-Wimplicitly-unsigned-literal]
+      //
+      return GetBasicType(type_system, lldb::eBasicTypeUnsignedLongLong);
+    }
+    return llvm::make_error<DILDiagnosticError>(
+        m_expr,
+        "integer literal is too large to be represented in any integer type",
+        literal->GetLocation());
+  }
+  return llvm::make_error<DILDiagnosticError>(
+      m_expr, "unable to create a const literal", literal->GetLocation());
 }
 
 llvm::Expected<lldb::ValueObjectSP>
 Interpreter::Visit(const ScalarLiteralNode *node) {
-  CompilerType result_type =
-      GetBasicTypeFromCU(m_exe_ctx_scope, node->GetType());
-  Scalar value = node->GetValue();
-  return ValueObject::CreateValueObjectFromScalar(m_target, value, result_type,
-                                                  "result");
+  auto type_system = GetTypeSystemFromCU(m_exe_ctx_scope);
+  if (type_system) {
+    auto type = PickLiteralType(type_system, m_exe_ctx_scope, node);
+    if (type) {
+      Scalar scalar = node->GetValue();
+      return ValueObject::CreateValueObjectFromScalar(m_target, scalar, *type,
+                                                      "result");
+    } else
+      return type.takeError();
+  }
+  return llvm::make_error<DILDiagnosticError>(
+      m_expr, "unable to create a const literal", node->GetLocation());
 }
 
 } // namespace lldb_private::dil
diff --git a/lldb/source/ValueObject/DILParser.cpp 
b/lldb/source/ValueObject/DILParser.cpp
index eee4a7bc4d363..35eb6d62b7ba4 100644
--- a/lldb/source/ValueObject/DILParser.cpp
+++ b/lldb/source/ValueObject/DILParser.cpp
@@ -396,30 +396,26 @@ ASTNodeUP DILParser::ParseNumericLiteral() {
   return numeric_constant;
 }
 
-static constexpr std::pair<const char *, lldb::BasicType> type_suffixes[] = {
-    {"ull", lldb::eBasicTypeUnsignedLongLong},
-    {"ul", lldb::eBasicTypeUnsignedLong},
-    {"u", lldb::eBasicTypeUnsignedInt},
-    {"ll", lldb::eBasicTypeLongLong},
-    {"l", lldb::eBasicTypeLong},
-};
-
 ASTNodeUP DILParser::ParseIntegerLiteral() {
   Token token = CurToken();
   auto spelling = token.GetSpelling();
   llvm::StringRef spelling_ref = spelling;
-  lldb::BasicType type = lldb::eBasicTypeInt;
-  for (auto [suffix, t] : type_suffixes) {
-    if (spelling_ref.consume_back_insensitive(suffix)) {
-      type = t;
-      break;
-    }
-  }
+
+  auto radix = llvm::getAutoSenseRadix(spelling_ref);
+  bool is_unsigned = false, is_long = false, is_longlong = false;
+  if (spelling_ref.consume_back_insensitive("ll"))
+    is_longlong = true;
+  if (spelling_ref.consume_back_insensitive("l"))
+    is_long = true;
+  if (spelling_ref.consume_back_insensitive("u"))
+    is_unsigned = true;
+
   llvm::APInt raw_value;
-  if (!spelling_ref.getAsInteger(0, raw_value)) {
+  if (!spelling_ref.getAsInteger(radix, raw_value)) {
     Scalar scalar_value(raw_value);
-    return std::make_unique<ScalarLiteralNode>(token.GetLocation(), type,
-                                               scalar_value);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(),
+                                               scalar_value, radix, 
is_unsigned,
+                                               is_long, is_longlong);
   }
   return std::make_unique<ErrorNode>();
 }
@@ -428,19 +424,20 @@ ASTNodeUP DILParser::ParseFloatingPointLiteral() {
   Token token = CurToken();
   auto spelling = token.GetSpelling();
   llvm::StringRef spelling_ref = spelling;
-  spelling_ref = spelling;
-  lldb::BasicType type = lldb::eBasicTypeDouble;
+
+  bool is_float = false;
   llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
   if (spelling_ref.consume_back_insensitive("f")) {
-    type = lldb::eBasicTypeFloat;
+    is_float = true;
     raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
   }
+
   auto StatusOrErr = raw_float.convertFromString(
       spelling_ref, llvm::APFloat::rmNearestTiesToEven);
   if (!errorToBool(StatusOrErr.takeError())) {
     Scalar scalar_value(raw_float);
-    return std::make_unique<ScalarLiteralNode>(token.GetLocation(), type,
-                                               scalar_value);
+    return std::make_unique<ScalarLiteralNode>(token.GetLocation(),
+                                               scalar_value, is_float);
   }
   return std::make_unique<ErrorNode>();
 }
diff --git 
a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
 
b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
index 883781873a525..57a636ebb0829 100644
--- 
a/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
+++ 
b/lldb/test/API/commands/frame/var-dil/expr/Arithmetic/TestFrameVarDILArithmetic.py
@@ -20,11 +20,20 @@ def test_arithmetic(self):
         self.runCmd("settings set target.experimental.use-DIL true")
 
         # Check number parsing
-        self.expect_var_path("1", value="1", type="int")
-        self.expect_var_path("1ull", value="1", type="unsigned long long")
-        self.expect_var_path("0b10", value="2", type="int")
-        self.expect_var_path("010", value="8", type="int")
-        self.expect_var_path("0x10", value="16", type="int")
         self.expect_var_path("1.0", value="1", type="double")
         self.expect_var_path("1.0f", value="1", type="float")
         self.expect_var_path("0x1.2p+3f", value="9", type="float")
+        self.expect_var_path("1", value="1", type="int")
+        self.expect_var_path("1u", value="1", type="unsigned int")
+        self.expect_var_path("0b1l", value="1", type="long")
+        self.expect_var_path("01ul", value="1", type="unsigned long")
+        self.expect_var_path("0o1ll", value="1", type="long long")
+        self.expect_var_path("0x1ULL", value="1", type="unsigned long long")
+        self.expect_var_path("0xFFFFFFFFFFFFFFFF", 
value="18446744073709551615")
+        self.expect(
+            "frame var '0xFFFFFFFFFFFFFFFFF'",
+            error=True,
+            substrs=[
+                "integer literal is too large to be represented in any integer 
type"
+            ],
+        )
diff --git a/llvm/include/llvm/ADT/StringRef.h 
b/llvm/include/llvm/ADT/StringRef.h
index 0ced1c0379a3b..16aca4d45892d 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -38,6 +38,8 @@ namespace llvm {
   LLVM_ABI bool getAsSignedInteger(StringRef Str, unsigned Radix,
                                    long long &Result);
 
+  LLVM_ABI unsigned getAutoSenseRadix(StringRef &Str);
+
   LLVM_ABI bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
                                        unsigned long long &Result);
   LLVM_ABI bool consumeSignedInteger(StringRef &Str, unsigned Radix,
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index dc758785e40d5..b6a2f8aeadccf 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -385,7 +385,7 @@ size_t StringRef::count(StringRef Str) const {
   return Count;
 }
 
-static unsigned GetAutoSenseRadix(StringRef &Str) {
+unsigned llvm::getAutoSenseRadix(StringRef &Str) {
   if (Str.empty())
     return 10;
 
@@ -410,7 +410,7 @@ bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned 
Radix,
                                   unsigned long long &Result) {
   // Autosense radix if not specified.
   if (Radix == 0)
-    Radix = GetAutoSenseRadix(Str);
+    Radix = getAutoSenseRadix(Str);
 
   // Empty strings (after the radix autosense) are invalid.
   if (Str.empty()) return true;
@@ -509,7 +509,7 @@ bool StringRef::consumeInteger(unsigned Radix, APInt 
&Result) {
 
   // Autosense radix if not specified.
   if (Radix == 0)
-    Radix = GetAutoSenseRadix(Str);
+    Radix = getAutoSenseRadix(Str);
 
   assert(Radix > 1 && Radix <= 36);
 

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

[Lldb-commits] [lldb] [llvm] [LLDB] Add `ScalarLiteralNode` and literal parsing in DIL (PR #152308)

Reply via email to