[Lldb-commits] [lldb] [lldb-dap] Implement `runInTerminal` for Windows (PR #121269)

2025-01-19 Thread Hu Jialun via lldb-commits

https://github.com/SuibianP updated 
https://github.com/llvm/llvm-project/pull/121269

>From c32fb82903243e59d6a33e4e38a4da6267981ea6 Mon Sep 17 00:00:00 2001
From: Hu Jialun 
Date: Sat, 28 Dec 2024 22:39:33 +0800
Subject: [PATCH] [lldb-dap] Implement runInTerminal for Windows

Currently, the named pipe is passed by name and a transient ofstream is
constructed at each I/O request. This assumes,
  - Blocking semantics: FIFO I/O waits for the other side to connect.
  - Buffered semantics: Closing one side does not discard existing data.

The former can be replaced by WaitNamedPipe/ConnectNamedPipe on Win32,
but the second cannot be easily worked around. It is also impossible to
have another "keep-alive" pipe server instance, as server-client pairs
are fixed on connection on Win32 and the client may get connected to it
instead of the real one.

Refactor FifoFile[IO] to use an open file handles rather than file name.

---

Win32 provides no way to replace the process image. Under the hood exec*
actually creates a new process with a new PID. DebugActiveProcess also
cannot get notified of process creations.

Create the new process in a suspended state and resume it after attach.
---
 lldb/tools/lldb-dap/FifoFiles.cpp | 120 ++
 lldb/tools/lldb-dap/FifoFiles.h   |  35 
 lldb/tools/lldb-dap/RunInTerminal.cpp |  35 ++--
 lldb/tools/lldb-dap/RunInTerminal.h   |   6 +-
 lldb/tools/lldb-dap/lldb-dap.cpp  |  48 ---
 5 files changed, 189 insertions(+), 55 deletions(-)

diff --git a/lldb/tools/lldb-dap/FifoFiles.cpp 
b/lldb/tools/lldb-dap/FifoFiles.cpp
index 1f1bba80bd3b11..ac6a7cbee8f8cd 100644
--- a/lldb/tools/lldb-dap/FifoFiles.cpp
+++ b/lldb/tools/lldb-dap/FifoFiles.cpp
@@ -9,7 +9,13 @@
 #include "FifoFiles.h"
 #include "JSONUtils.h"
 
-#if !defined(_WIN32)
+#include "llvm/Support/FileSystem.h"
+
+#if defined(_WIN32)
+#include 
+#include 
+#include 
+#else
 #include 
 #include 
 #include 
@@ -24,27 +30,74 @@ using namespace llvm;
 
 namespace lldb_dap {
 
-FifoFile::FifoFile(StringRef path) : m_path(path) {}
+std::error_code EC;
 
+FifoFile::FifoFile(StringRef path)
+: m_path(path), m_file(fopen(path.data(), "r+")) {
+  if (m_file == nullptr) {
+EC = std::error_code(errno, std::generic_category());
+llvm::errs() << "Failed to open fifo file: " << path << EC.message()
+ << "\n";
+std::terminate();
+  }
+  if (setvbuf(m_file, NULL, _IONBF, 0))
+llvm::errs() << "Error setting unbuffered mode on C FILE\n";
+}
+FifoFile::FifoFile(StringRef path, FILE *f) : m_path(path), m_file(f) {}
+FifoFile::FifoFile(FifoFile &&other)
+: m_path(other.m_path), m_file(other.m_file) {
+  other.m_file = nullptr;
+}
 FifoFile::~FifoFile() {
+  if (m_file)
+fclose(m_file);
 #if !defined(_WIN32)
+  // Unreferenced named pipes are deleted automatically on Win32
   unlink(m_path.c_str());
 #endif
 }
 
-Expected> CreateFifoFile(StringRef path) {
-#if defined(_WIN32)
-  return createStringError(inconvertibleErrorCode(), "Unimplemented");
+// This probably belongs to llvm::sys::fs as another FSEntity type
+std::error_code createNamedPipe(const Twine &Prefix, StringRef Suffix,
+int &ResultFd,
+SmallVectorImpl &ResultPath) {
+  const char *Middle = Suffix.empty() ? "-%%" : "-%%.";
+  auto EC = sys::fs::getPotentiallyUniqueFileName(
+#ifdef _WIN32
+  ".\\pipe\\LOCAL\\"
+#else
+  "/tmp/"
+#endif
+  + Prefix + Middle + Suffix,
+  ResultPath);
+  if (EC)
+return EC;
+  ResultPath.push_back(0);
+  const char *path = ResultPath.data();
+#ifdef _WIN32
+  HANDLE h = ::CreateNamedPipeA(
+  path, PIPE_ACCESS_DUPLEX | FILE_FLAG_FIRST_PIPE_INSTANCE,
+  PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, 1, 1024, 1024, 0, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+return std::error_code(::GetLastError(), std::system_category());
+  ResultFd = _open_osfhandle((intptr_t)h, _O_TEXT | _O_RDWR);
+  if (ResultFd == -1)
+return std::error_code(::GetLastError(), std::system_category());
 #else
-  if (int err = mkfifo(path.data(), 0600))
-return createStringError(std::error_code(err, std::generic_category()),
- "Couldn't create fifo file: %s", path.data());
-  return std::make_shared(path);
+  if (mkfifo(path, 0600) == -1)
+return std::error_code(errno, std::generic_category());
+  EC = openFileForWrite(ResultPath, ResultFd, sys::fs::CD_OpenExisting,
+sys::fs::OF_None, 0600);
+  if (EC)
+return EC;
 #endif
+  ResultPath.pop_back();
+  return std::error_code();
 }
 
-FifoFileIO::FifoFileIO(StringRef fifo_file, StringRef other_endpoint_name)
-: m_fifo_file(fifo_file), m_other_endpoint_name(other_endpoint_name) {}
+FifoFileIO::FifoFileIO(FifoFile &&fifo_file, StringRef other_endpoint_name)
+: m_fifo_file(std::move(fifo_file)),
+  m_other_endpoint_name(other_endpoint_name) {}
 
 Expected FifoF

[Lldb-commits] [lldb] [lldb] do not show misleading error when there is no frame (PR #119103)

2025-01-19 Thread via lldb-commits

oltolm wrote:

ping

https://github.com/llvm/llvm-project/pull/119103
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,205 @@
+//===-- DILLexer.cpp 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// This implements the recursive descent parser for the Data Inspection
+// Language (DIL), and its helper functions, which will eventually underlie the
+// 'frame variable' command. The language that this parser recognizes is
+// described in lldb/docs/dil-expr-lang.ebnf
+//
+//===--===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+// For fast keyword lookup. More keywords will be added later.
+const llvm::StringMap Keywords = {
+{"namespace", dil::TokenKind::kw_namespace},
+};
+
+const std::string DILToken::getTokenName(dil::TokenKind kind) {
+  switch (kind) {
+  case dil::TokenKind::coloncolon:
+return "coloncolon";
+  case dil::TokenKind::eof:
+return "eof";
+  case dil::TokenKind::identifier:
+return "identifier";
+  case dil::TokenKind::kw_namespace:
+return "namespace";
+  case dil::TokenKind::l_paren:
+return "l_paren";
+  case dil::TokenKind::r_paren:
+return "r_paren";
+  case dil::TokenKind::unknown:
+return "unknown";
+  default:
+return "token_name";
+  }
+}
+
+static bool Is_Letter(char c) {
+  if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+return true;
+  return false;
+}
+
+static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); }
+
+// A word starts with a letter, underscore, or dollar sign, followed by
+// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or  underscores.
+bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) {
+  bool done = false;
+  bool dollar_start = false;
+
+  // Must not start with a digit.
+  if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos))
+return false;
+
+  // First character *may* be a '$', for a register name or convenience
+  // variable.
+  if (*m_cur_pos == '$') {
+dollar_start = true;
+++m_cur_pos;
+length++;
+  }
+
+  // Contains only letters, digits or underscores
+  for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) {
+char c = *m_cur_pos;
+if (!Is_Letter(c) && !Is_Digit(c) && c != '_') {
+  done = true;
+  break;
+} else
+  length++;
+  }
+
+  if (dollar_start && length > 1) // Must have something besides just '$'
+return true;
+
+  if (!dollar_start && length > 0)
+return true;
+
+  // Not a valid word, so re-set the lexing position.
+  m_cur_pos = start;
+  return false;
+}
+
+void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos) {
+  DILToken new_token;
+  result.setValues(tok_kind, tok_str, tok_pos);
+  new_token = result;
+  m_lexed_tokens.push_back(std::move(new_token));
+}
+
+bool DILLexer::Lex(DILToken &result, bool look_ahead) {
+  bool retval = true;
+
+  if (!look_ahead) {
+// We're being asked for the 'next' token, and not a part of a LookAhead.
+// Check to see if we've already lexed it and pushed it onto our tokens
+// vector; if so, return the next token from the vector, rather than doing
+// more lexing.
+if ((m_tokens_idx != UINT_MAX) &&
+(m_tokens_idx < m_lexed_tokens.size() - 1)) {
+  result = m_lexed_tokens[m_tokens_idx + 1];
+  return retval;
+}
+  }
+
+  // Skip over whitespace (spaces).
+  while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ')
+m_cur_pos++;
+
+  // Check to see if we've reached the end of our input string.
+  if (m_cur_pos == m_expr.end()) {
+UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length());
+return retval;
+  }
+
+  uint32_t position = m_cur_pos - m_expr.begin();
+  ;
+  std::string::iterator start = m_cur_pos;
+  uint32_t length = 0;
+  if (Is_Word(start, length)) {
+dil::TokenKind kind;
+std::string word = m_expr.substr(position, length);
+auto iter = Keywords.find(word);
+if (iter != Keywords.end())
+  kind = iter->second;
+else
+  kind = dil::TokenKind::identifier;
+
+UpdateLexedTokens(result, kind, word, position);
+return true;
+  }
+
+  switch (*m_cur_pos) {
+  case '(':
+m_cur_pos++;
+UpdateLexedTokens(result, dil::TokenKind::l_paren, "(", position);
+return true;
+  case ')':
+m_cur_pos++;
+UpdateLexedTokens(result, dil::TokenKind::r_paren, ")", position);
+return true;
+  case ':':
+if (position + 1 < m_expr.size() && m_expr[position + 1] == ':') {
+  m_cur_pos += 2;
+  UpdateLexedTokens(result, dil::TokenKind::coloncolon, "::", position);
+  return true;
+}
+break;
+  default:
+break;
+  }
+  // Empty Token
+  result.setValues(dil::TokenKind::none,

[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,205 @@
+//===-- DILLexer.cpp 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// This implements the recursive descent parser for the Data Inspection
+// Language (DIL), and its helper functions, which will eventually underlie the
+// 'frame variable' command. The language that this parser recognizes is
+// described in lldb/docs/dil-expr-lang.ebnf
+//
+//===--===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "llvm/ADT/StringMap.h"
+
+namespace lldb_private {
+
+namespace dil {
+
+// For fast keyword lookup. More keywords will be added later.
+const llvm::StringMap Keywords = {
+{"namespace", dil::TokenKind::kw_namespace},
+};
+
+const std::string DILToken::getTokenName(dil::TokenKind kind) {
+  switch (kind) {
+  case dil::TokenKind::coloncolon:
+return "coloncolon";
+  case dil::TokenKind::eof:
+return "eof";
+  case dil::TokenKind::identifier:
+return "identifier";
+  case dil::TokenKind::kw_namespace:
+return "namespace";
+  case dil::TokenKind::l_paren:
+return "l_paren";
+  case dil::TokenKind::r_paren:
+return "r_paren";
+  case dil::TokenKind::unknown:
+return "unknown";
+  default:
+return "token_name";
+  }
+}
+
+static bool Is_Letter(char c) {
+  if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+return true;
+  return false;
+}
+
+static bool Is_Digit(char c) { return ('0' <= c && c <= '9'); }
+
+// A word starts with a letter, underscore, or dollar sign, followed by
+// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or  underscores.
+bool DILLexer::Is_Word(std::string::iterator start, uint32_t &length) {
+  bool done = false;
+  bool dollar_start = false;
+
+  // Must not start with a digit.
+  if (m_cur_pos == m_expr.end() || Is_Digit(*m_cur_pos))
+return false;
+
+  // First character *may* be a '$', for a register name or convenience
+  // variable.
+  if (*m_cur_pos == '$') {
+dollar_start = true;
+++m_cur_pos;
+length++;
+  }
+
+  // Contains only letters, digits or underscores
+  for (; m_cur_pos != m_expr.end() && !done; ++m_cur_pos) {
+char c = *m_cur_pos;
+if (!Is_Letter(c) && !Is_Digit(c) && c != '_') {
+  done = true;
+  break;
+} else
+  length++;
+  }
+
+  if (dollar_start && length > 1) // Must have something besides just '$'
+return true;
+
+  if (!dollar_start && length > 0)
+return true;
+
+  // Not a valid word, so re-set the lexing position.
+  m_cur_pos = start;
+  return false;
+}
+
+void DILLexer::UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos) {
+  DILToken new_token;
+  result.setValues(tok_kind, tok_str, tok_pos);
+  new_token = result;
+  m_lexed_tokens.push_back(std::move(new_token));
+}
+
+bool DILLexer::Lex(DILToken &result, bool look_ahead) {
+  bool retval = true;
+
+  if (!look_ahead) {
+// We're being asked for the 'next' token, and not a part of a LookAhead.
+// Check to see if we've already lexed it and pushed it onto our tokens
+// vector; if so, return the next token from the vector, rather than doing
+// more lexing.
+if ((m_tokens_idx != UINT_MAX) &&
+(m_tokens_idx < m_lexed_tokens.size() - 1)) {
+  result = m_lexed_tokens[m_tokens_idx + 1];
+  return retval;
+}
+  }
+
+  // Skip over whitespace (spaces).
+  while (m_cur_pos != m_expr.end() && *m_cur_pos == ' ')
+m_cur_pos++;
+
+  // Check to see if we've reached the end of our input string.
+  if (m_cur_pos == m_expr.end()) {
+UpdateLexedTokens(result, dil::TokenKind::eof, "", m_expr.length());
+return retval;
+  }
+
+  uint32_t position = m_cur_pos - m_expr.begin();
+  ;

werat wrote:

```suggestion
```

https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+  DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+  : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+  DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+  void setKind(dil::TokenKind kind) { m_kind = kind; }
+  dil::TokenKind getKind() const { return m_kind; }
+
+  std::string getSpelling() const { return m_spelling; }
+
+  uint32_t getLength() const { return m_spelling.size(); }
+
+  bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+  bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+  bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+return is(kind1) || is(kind2);
+  }
+
+  template  bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+return is(kind) || isOneOf(Ks...);
+  }
+
+  uint32_t getLocation() const { return m_start_pos; }
+
+  void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+m_kind = kind;
+m_spelling = spelling;
+m_start_pos = start;
+  }
+
+  static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+  dil::TokenKind m_kind;
+  std::string m_spelling;
+  uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+  DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+m_cur_pos = m_expr.begin();
+// Use UINT_MAX to indicate invalid/uninitialized value.
+m_tokens_idx = UINT_MAX;
+  }
+
+  bool Lex(DILToken &result, bool look_ahead = false);
+
+  bool Is_Word(std::string::iterator start, uint32_t &length);
+
+  uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+  /// Update 'result' with the other paremeter values, create a
+  /// duplicate token, and push the duplicate token onto the vector of
+  /// lexed tokens.
+  void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+  /// Return the lexed token N+1 positions ahead of the 'current' token
+  /// being handled by the DIL parser.
+  const DILToken &LookAhead(uint32_t N);
+
+  const DILToken &AcceptLookAhead(uint32_t N);

werat wrote:

I think this API might be simpler. The lexer doesn't actually need to re-lex, 
the results will always be the same. We only need to rollback occasionally, but 
we'll always be process the same sequence of tokens the second time.

So the lexer can always add the tokens to the `m_lexed_tokens` vector and we 
only need `GetCurrentTokenIdx()` and `ResetTokenIdx()` to do the rollback.

https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,193 @@
+//===-- DILLexerTests.cpp ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "lldb/ValueObject/DILLexer.h"
+#include "llvm/ADT/StringRef.h"
+#include "gtest/gtest.h"
+#include 
+
+using llvm::StringRef;
+
+TEST(DILLexerTests, SimpleTest) {
+  StringRef dil_input_expr("simple_var");
+  uint32_t tok_len = 10;
+  lldb_private::dil::DILLexer dil_lexer(dil_input_expr);

werat wrote:

IMO can drop `dil_` here. All these tests are in the context of DIL, so it's 
clear what it's all about.

```suggestion
  lldb_private::dil::DILLexer lexer(input_expr);
```

https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+  DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+  : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+  DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+  void setKind(dil::TokenKind kind) { m_kind = kind; }
+  dil::TokenKind getKind() const { return m_kind; }
+
+  std::string getSpelling() const { return m_spelling; }
+
+  uint32_t getLength() const { return m_spelling.size(); }
+
+  bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+  bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+  bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+return is(kind1) || is(kind2);
+  }
+
+  template  bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+return is(kind) || isOneOf(Ks...);
+  }
+
+  uint32_t getLocation() const { return m_start_pos; }
+
+  void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+m_kind = kind;
+m_spelling = spelling;
+m_start_pos = start;
+  }
+
+  static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+  dil::TokenKind m_kind;
+  std::string m_spelling;
+  uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+  DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+m_cur_pos = m_expr.begin();
+// Use UINT_MAX to indicate invalid/uninitialized value.
+m_tokens_idx = UINT_MAX;
+  }
+
+  bool Lex(DILToken &result, bool look_ahead = false);
+
+  bool Is_Word(std::string::iterator start, uint32_t &length);
+
+  uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+  /// Update 'result' with the other paremeter values, create a
+  /// duplicate token, and push the duplicate token onto the vector of
+  /// lexed tokens.
+  void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+  /// Return the lexed token N+1 positions ahead of the 'current' token
+  /// being handled by the DIL parser.
+  const DILToken &LookAhead(uint32_t N);
+
+  const DILToken &AcceptLookAhead(uint32_t N);
+
+  /// Return the index for the 'current' token being handled by the DIL parser.
+  uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
+  /// Return the current token to be handled by the DIL parser.
+  DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+  /// Update the index for the 'current' token, to point to the next lexed
+  /// token.
+  bool IncrementTokenIdx() {

werat wrote:

Shouldn't `Lex()` do this automatically?

https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {

werat wrote:

Since we already have a `dill::` namespace, maybe we can drop `DIL` prefix?

https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread Andy Hippo via lldb-commits


@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+  DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+  : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+  DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+  void setKind(dil::TokenKind kind) { m_kind = kind; }
+  dil::TokenKind getKind() const { return m_kind; }
+
+  std::string getSpelling() const { return m_spelling; }
+
+  uint32_t getLength() const { return m_spelling.size(); }
+
+  bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+  bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+  bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+return is(kind1) || is(kind2);
+  }
+
+  template  bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+return is(kind) || isOneOf(Ks...);
+  }
+
+  uint32_t getLocation() const { return m_start_pos; }
+
+  void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+m_kind = kind;
+m_spelling = spelling;
+m_start_pos = start;
+  }
+
+  static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+  dil::TokenKind m_kind;
+  std::string m_spelling;
+  uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+  DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {

werat wrote:

I think if we accept `StringRef` as input, we shouldn't copy the data and work 
with the provided string view (and assume we don't outlive it).
If you need the lexer to own the text (and you probably don't), then accept 
`std::string` and move from it.

https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [clang] [lldb] [llvm] Patch series to reapply #118734 and substantially improve it (PR #120534)

2025-01-19 Thread via lldb-commits

dyung wrote:

> @dyung - OK, I think the current just-pushed version of this PR is worth 
> another test.
> 
> I've taught the TableGen string table emission to go back to working around 
> the MSVC issues using a different table form that we used to use in LLVM when 
> MSVC had a reliable error on it. It appears that now it only sometimes gets 
> miscompiled, and so this workaround got accidentally removed without really 
> being in a reliable place.
> 
> But that still requires that all of the large string tables go through 
> TableGen to get emitted in this form. So I've sent two PRs (separately, they 
> seem independently good) to move NVPTX and Hexagon to TableGen, and this PR 
> contains patches to move more of the ARM builtins to TableGen.
> 
> I _think_ this gets all of the ones that produced too-large string tables, 
> but testing should show. Let me know. If this works and there isn't a really 
> short path to upgrade, I'll start pulling some of the pre-requisite PRs out 
> for independent review, and then update this PR once I'm down to the minimal 
> chain of PRs that do the full conversion of builtins to string tables.
> 
> Thanks, -Chandler

Some good news, everything seems to pass after your latest changes in this PR! 
I didn't believe it at first and did a clean rebuild and test to verify. In the 
end everything passed again.

That being said, I am working on deploying an updated version of VS2019 to our 
internal builders. I have gotten agreement with the plan, the only possible 
hold-up is that we would need to rebuild our internal builders which likely may 
not get done for a few weeks. I can apply quick work-arounds to the machines, 
but they ultimately will need to be rebuilt.

Would you prefer to move forward with the solution currently implemented in 
this PR or for us to update our internal builders?

https://github.com/llvm/llvm-project/pull/120534
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread via lldb-commits

https://github.com/cmtice created 
https://github.com/llvm/llvm-project/pull/123521

This adds the basic lexer, with unittests, for the Data Inspection Language 
(DIL) -- see
https://discourse.llvm.org/t/rfc-data-inspection-language/69893

This version of the lexer only handles local variables and namespaces, and is 
designed to work  with
https://github.com/llvm/llvm-project/pull/120971.

>From 468f73f8539dcb8addf8ed9618d9eb797dabbb01 Mon Sep 17 00:00:00 2001
From: Caroline Tice 
Date: Sun, 19 Jan 2025 09:15:34 -0800
Subject: [PATCH] [LLDB] Add Lexer (with tests) for DIL (Data Inspection
 Language).

This adds the basic lexer, with unittests, for the Data Inspection
Language (DIL) -- see
https://discourse.llvm.org/t/rfc-data-inspection-language/69893

This version of the lexer only handles local variables and namespaces,
and is designed to work  with
https://github.com/llvm/llvm-project/pull/120971.
---
 lldb/include/lldb/ValueObject/DILLexer.h | 156 ++
 lldb/source/ValueObject/DILLexer.cpp | 205 +++
 lldb/unittests/ValueObject/CMakeLists.txt|   1 +
 lldb/unittests/ValueObject/DILLexerTests.cpp | 193 +
 4 files changed, 555 insertions(+)
 create mode 100644 lldb/include/lldb/ValueObject/DILLexer.h
 create mode 100644 lldb/source/ValueObject/DILLexer.cpp
 create mode 100644 lldb/unittests/ValueObject/DILLexerTests.cpp

diff --git a/lldb/include/lldb/ValueObject/DILLexer.h 
b/lldb/include/lldb/ValueObject/DILLexer.h
new file mode 100644
index 00..45c506b2f4106d
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+  DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+  : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+  DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+  void setKind(dil::TokenKind kind) { m_kind = kind; }
+  dil::TokenKind getKind() const { return m_kind; }
+
+  std::string getSpelling() const { return m_spelling; }
+
+  uint32_t getLength() const { return m_spelling.size(); }
+
+  bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+  bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+  bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+return is(kind1) || is(kind2);
+  }
+
+  template  bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+return is(kind) || isOneOf(Ks...);
+  }
+
+  uint32_t getLocation() const { return m_start_pos; }
+
+  void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+m_kind = kind;
+m_spelling = spelling;
+m_start_pos = start;
+  }
+
+  static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+  dil::TokenKind m_kind;
+  std::string m_spelling;
+  uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+  DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+m_cur_pos = m_expr.begin();
+// Use UINT_MAX to indicate invalid/uninitialized value.
+m_tokens_idx = UINT_MAX;
+  }
+
+  bool Lex(DILToken &result, bool look_ahead = false);
+
+  bool Is_Word(std::string::iterator start, uint32_t &length);
+
+  uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+  /// Update 'result' with the other paremeter values, create a
+  /// duplicate token, and push the duplicate token onto the vector of
+  /// lexed tokens.
+  void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+  /// Return the lexed token N+1 positions ahead of the 'current' token
+  /// being handled by the DIL parser.
+  const DILToken &LookAhead(uint32_t N);
+
+  const DILToken &AcceptLookAhead(uint32_t N);
+
+  /// Return the index for the 'current' token being handled by the DIL parser.
+  uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
+  /// Return the current token to be handled by the DIL parser.
+  DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+  /// Update the index for

[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread via lldb-commits

llvmbot wrote:




@llvm/pr-subscribers-lldb

Author: None (cmtice)


Changes

This adds the basic lexer, with unittests, for the Data Inspection Language 
(DIL) -- see
https://discourse.llvm.org/t/rfc-data-inspection-language/69893

This version of the lexer only handles local variables and namespaces, and is 
designed to work  with
https://github.com/llvm/llvm-project/pull/120971.

---
Full diff: https://github.com/llvm/llvm-project/pull/123521.diff


4 Files Affected:

- (added) lldb/include/lldb/ValueObject/DILLexer.h (+156) 
- (added) lldb/source/ValueObject/DILLexer.cpp (+205) 
- (modified) lldb/unittests/ValueObject/CMakeLists.txt (+1) 
- (added) lldb/unittests/ValueObject/DILLexerTests.cpp (+193) 


``diff
diff --git a/lldb/include/lldb/ValueObject/DILLexer.h 
b/lldb/include/lldb/ValueObject/DILLexer.h
new file mode 100644
index 00..45c506b2f4106d
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+  DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+  : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+  DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+  void setKind(dil::TokenKind kind) { m_kind = kind; }
+  dil::TokenKind getKind() const { return m_kind; }
+
+  std::string getSpelling() const { return m_spelling; }
+
+  uint32_t getLength() const { return m_spelling.size(); }
+
+  bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+  bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+  bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+return is(kind1) || is(kind2);
+  }
+
+  template  bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+return is(kind) || isOneOf(Ks...);
+  }
+
+  uint32_t getLocation() const { return m_start_pos; }
+
+  void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+m_kind = kind;
+m_spelling = spelling;
+m_start_pos = start;
+  }
+
+  static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+  dil::TokenKind m_kind;
+  std::string m_spelling;
+  uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+  DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+m_cur_pos = m_expr.begin();
+// Use UINT_MAX to indicate invalid/uninitialized value.
+m_tokens_idx = UINT_MAX;
+  }
+
+  bool Lex(DILToken &result, bool look_ahead = false);
+
+  bool Is_Word(std::string::iterator start, uint32_t &length);
+
+  uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+  /// Update 'result' with the other paremeter values, create a
+  /// duplicate token, and push the duplicate token onto the vector of
+  /// lexed tokens.
+  void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+  /// Return the lexed token N+1 positions ahead of the 'current' token
+  /// being handled by the DIL parser.
+  const DILToken &LookAhead(uint32_t N);
+
+  const DILToken &AcceptLookAhead(uint32_t N);
+
+  /// Return the index for the 'current' token being handled by the DIL parser.
+  uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
+  /// Return the current token to be handled by the DIL parser.
+  DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+  /// Update the index for the 'current' token, to point to the next lexed
+  /// token.
+  bool IncrementTokenIdx() {
+if (m_tokens_idx >= m_lexed_tokens.size() - 1)
+  return false;
+
+m_tokens_idx++;
+return true;
+  }
+
+  /// Set the index for the 'current' token (to be handled by the parser)
+  /// to a particular position. Used for either committing 'look ahead' parsing
+  /// or rolling back tentative parsing.
+  bool ResetTokenIdx(uint32_t new_value) {
+if (new_value > m_lexed_tokens.size() - 1)
+  return false;
+
+m_tokens_idx = new_value;
+return true;
+  }
+
+private:
+  // The input string we are lexing & parsing.
+  std::string m_expr;
+
+  

[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread via lldb-commits

https://github.com/cmtice updated 
https://github.com/llvm/llvm-project/pull/123521

>From 468f73f8539dcb8addf8ed9618d9eb797dabbb01 Mon Sep 17 00:00:00 2001
From: Caroline Tice 
Date: Sun, 19 Jan 2025 09:15:34 -0800
Subject: [PATCH 1/2] [LLDB] Add Lexer (with tests) for DIL (Data Inspection
 Language).

This adds the basic lexer, with unittests, for the Data Inspection
Language (DIL) -- see
https://discourse.llvm.org/t/rfc-data-inspection-language/69893

This version of the lexer only handles local variables and namespaces,
and is designed to work  with
https://github.com/llvm/llvm-project/pull/120971.
---
 lldb/include/lldb/ValueObject/DILLexer.h | 156 ++
 lldb/source/ValueObject/DILLexer.cpp | 205 +++
 lldb/unittests/ValueObject/CMakeLists.txt|   1 +
 lldb/unittests/ValueObject/DILLexerTests.cpp | 193 +
 4 files changed, 555 insertions(+)
 create mode 100644 lldb/include/lldb/ValueObject/DILLexer.h
 create mode 100644 lldb/source/ValueObject/DILLexer.cpp
 create mode 100644 lldb/unittests/ValueObject/DILLexerTests.cpp

diff --git a/lldb/include/lldb/ValueObject/DILLexer.h 
b/lldb/include/lldb/ValueObject/DILLexer.h
new file mode 100644
index 00..45c506b2f4106d
--- /dev/null
+++ b/lldb/include/lldb/ValueObject/DILLexer.h
@@ -0,0 +1,156 @@
+//===-- DILLexer.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLDB_VALUEOBJECT_DILLEXER_H_
+#define LLDB_VALUEOBJECT_DILLEXER_H_
+
+#include "llvm/ADT/StringRef.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace lldb_private {
+
+namespace dil {
+
+enum class TokenKind {
+  coloncolon,
+  eof,
+  identifier,
+  invalid,
+  kw_namespace,
+  l_paren,
+  none,
+  r_paren,
+  unknown,
+};
+
+/// Class defining the tokens generated by the DIL lexer and used by the
+/// DIL parser.
+class DILToken {
+public:
+  DILToken(dil::TokenKind kind, std::string spelling, uint32_t start)
+  : m_kind(kind), m_spelling(spelling), m_start_pos(start) {}
+
+  DILToken() : m_kind(dil::TokenKind::none), m_spelling(""), m_start_pos(0) {}
+
+  void setKind(dil::TokenKind kind) { m_kind = kind; }
+  dil::TokenKind getKind() const { return m_kind; }
+
+  std::string getSpelling() const { return m_spelling; }
+
+  uint32_t getLength() const { return m_spelling.size(); }
+
+  bool is(dil::TokenKind kind) const { return m_kind == kind; }
+
+  bool isNot(dil::TokenKind kind) const { return m_kind != kind; }
+
+  bool isOneOf(dil::TokenKind kind1, dil::TokenKind kind2) const {
+return is(kind1) || is(kind2);
+  }
+
+  template  bool isOneOf(dil::TokenKind kind, Ts... Ks) const {
+return is(kind) || isOneOf(Ks...);
+  }
+
+  uint32_t getLocation() const { return m_start_pos; }
+
+  void setValues(dil::TokenKind kind, std::string spelling, uint32_t start) {
+m_kind = kind;
+m_spelling = spelling;
+m_start_pos = start;
+  }
+
+  static const std::string getTokenName(dil::TokenKind kind);
+
+private:
+  dil::TokenKind m_kind;
+  std::string m_spelling;
+  uint32_t m_start_pos; // within entire expression string
+};
+
+/// Class for doing the simple lexing required by DIL.
+class DILLexer {
+public:
+  DILLexer(llvm::StringRef dil_expr) : m_expr(dil_expr.str()) {
+m_cur_pos = m_expr.begin();
+// Use UINT_MAX to indicate invalid/uninitialized value.
+m_tokens_idx = UINT_MAX;
+  }
+
+  bool Lex(DILToken &result, bool look_ahead = false);
+
+  bool Is_Word(std::string::iterator start, uint32_t &length);
+
+  uint32_t GetLocation() { return m_cur_pos - m_expr.begin(); }
+
+  /// Update 'result' with the other paremeter values, create a
+  /// duplicate token, and push the duplicate token onto the vector of
+  /// lexed tokens.
+  void UpdateLexedTokens(DILToken &result, dil::TokenKind tok_kind,
+ std::string tok_str, uint32_t tok_pos);
+
+  /// Return the lexed token N+1 positions ahead of the 'current' token
+  /// being handled by the DIL parser.
+  const DILToken &LookAhead(uint32_t N);
+
+  const DILToken &AcceptLookAhead(uint32_t N);
+
+  /// Return the index for the 'current' token being handled by the DIL parser.
+  uint32_t GetCurrentTokenIdx() { return m_tokens_idx; }
+
+  /// Return the current token to be handled by the DIL parser.
+  DILToken &GetCurrentToken() { return m_lexed_tokens[m_tokens_idx]; }
+
+  /// Update the index for the 'current' token, to point to the next lexed
+  /// token.
+  bool IncrementTokenIdx() {
+if (m_tokens_idx >= m_lexed_tokens.size() - 1)
+  return false;
+
+m_tokens_idx++;
+return true;
+  }
+
+  /// Set the index for the 'current' token (to be handled by the parser)
+  /// to a par

[Lldb-commits] [lldb] [LLDB] Add Lexer (with tests) for DIL (Data Inspection Language). (PR #123521)

2025-01-19 Thread via lldb-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 23a239267e8a1d20ed10d3545feaf2a2bb70b085 
61a2607a70d90688d395321e846a3be58ccbebcb --extensions h,cpp -- 
lldb/include/lldb/ValueObject/DILLexer.h lldb/source/ValueObject/DILLexer.cpp 
lldb/unittests/ValueObject/DILLexerTests.cpp
``





View the diff from clang-format here.


``diff
diff --git a/lldb/unittests/ValueObject/DILLexerTests.cpp 
b/lldb/unittests/ValueObject/DILLexerTests.cpp
index ec6ff86b64..11fc5f94fe 100644
--- a/lldb/unittests/ValueObject/DILLexerTests.cpp
+++ b/lldb/unittests/ValueObject/DILLexerTests.cpp
@@ -142,28 +142,10 @@ TEST(DILLexerTests, MultiTokenLexTest) {
 
 TEST(DILLexerTests, IdentifiersTest) {
   std::vector valid_identifiers = {
-"$My_name1",
-"$pc",
-"abcd",
-"ab cd",
-"_",
-"_a",
-"_a_",
-"a_b",
-"this",
-"self",
-"a",
-"MyName"
-  };
+  "$My_name1", "$pc", "abcd", "ab cd", "_", "_a",
+  "_a_",   "a_b", "this", "self",  "a", "MyName"};
   std::vector invalid_identifiers = {
-"234",
-"2a",
-"2",
-"$",
-"1MyName",
-"",
-"namespace"
-  };
+  "234", "2a", "2", "$", "1MyName", "", "namespace"};
 
   // Verify that all of the valid identifiers come out as identifier tokens.
   for (auto str : valid_identifiers) {

``




https://github.com/llvm/llvm-project/pull/123521
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Support format string in the prompt (PR #123430)

2025-01-19 Thread Pavel Labath via lldb-commits


@@ -69,6 +69,22 @@ def test_prompt_color(self):
 # Column: 16.8
 self.child.expect(re.escape("\x1b[31m(lldb) \x1b[0m\x1b[8G"))
 
+@skipIfAsan
+@skipIfEditlineSupportMissing
+def test_prompt_format_color(self):
+"""Test that we can change the prompt color with a format string."""
+self.launch(use_colors=True)
+# Clear the prefix and suffix setting to simplify the output.
+self.child.send('settings set prompt-ansi-prefix ""\n')
+self.child.send('settings set prompt-ansi-suffix ""\n')

labath wrote:

```suggestion
self.expect('settings set prompt-ansi-prefix ""')
self.expect('settings set prompt-ansi-suffix ""')
```

This makes sure lldb processes the command before you give it the next one.

https://github.com/llvm/llvm-project/pull/123430
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Support format string in the prompt (PR #123430)

2025-01-19 Thread Pavel Labath via lldb-commits

https://github.com/labath approved this pull request.


https://github.com/llvm/llvm-project/pull/123430
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Support format string in the prompt (PR #123430)

2025-01-19 Thread Pavel Labath via lldb-commits


@@ -171,7 +171,45 @@ inline std::string FormatAnsiTerminalCodes(llvm::StringRef 
format,
   }
   return fmt;
 }
+
+inline std::string StripAnsiTerminalCodes(llvm::StringRef str) {
+  std::string stripped;
+  while (!str.empty()) {
+llvm::StringRef left, right;
+
+std::tie(left, right) = str.split(ANSI_ESC_START);
+stripped += left;
+
+// ANSI_ESC_START not found.
+if (left == str && right.empty())
+  break;
+
+auto end = llvm::StringRef::npos;
+for (size_t i = 0; i < right.size(); i++) {
+  char c = right[i];
+  if (c == 'm' || c == 'G') {
+end = i;
+break;
+  }
+  if (isdigit(c) || c == ';')
+continue;
+
+  break;
+}
+
+// ANSI_ESC_END not found.
+if (end != llvm::StringRef::npos) {
+  str = right.substr(end + 1);
+  continue;
+}
+
+stripped += ANSI_ESC_START;
+str = right;

labath wrote:

I think it should be equivalent, but a little less lumberjacky.

```suggestion
size_t end = right.find_first_not_of("0123456789;");
if (end < right.size() && (right[end] == 'm' || right[end] == 'G')) {
  str = right.substr(end + 1);
} else {
  // ANSI_ESC_END not found.
  stripped += ANSI_ESC_START;
  str = right;
}
```

https://github.com/llvm/llvm-project/pull/123430
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Support format string in the prompt (PR #123430)

2025-01-19 Thread Pavel Labath via lldb-commits


@@ -69,6 +69,22 @@ def test_prompt_color(self):
 # Column: 16.8
 self.child.expect(re.escape("\x1b[31m(lldb) \x1b[0m\x1b[8G"))
 
+@skipIfAsan
+@skipIfEditlineSupportMissing
+def test_prompt_format_color(self):
+"""Test that we can change the prompt color with a format string."""
+self.launch(use_colors=True)
+# Clear the prefix and suffix setting to simplify the output.
+self.child.send('settings set prompt-ansi-prefix ""\n')
+self.child.send('settings set prompt-ansi-suffix ""\n')
+self.child.send('settings set prompt 
"${ansi.fg.red}(lldb)${ansi.normal} "\n')
+self.child.send("foo")
+# Make sure this change is reflected immediately. Check that the color
+# is set (31) and the cursor position (8) is correct.
+# Prompt: (lldb) _
+# Column: 16.8
+self.child.expect(re.escape("\x1b[31m(lldb)\x1b[0m foo"))

labath wrote:

The precommit failure appears to be genuine. The required string is present in 
the output, but for some reason this line starts searching after it. My 
suggested change may or may not fix it.

https://github.com/llvm/llvm-project/pull/123430
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Implement ${target.file} format variable (PR #123431)

2025-01-19 Thread Pavel Labath via lldb-commits


@@ -1469,6 +1472,19 @@ bool FormatEntity::Format(const Entry &entry, Stream &s,
 }
 return false;
 
+  case Entry::Type::TargetFile:
+if (exe_ctx) {
+  Target *target = exe_ctx->GetTargetPtr();
+  if (target) {
+Module *exe_module = target->GetExecutableModulePointer();
+if (exe_module) {

labath wrote:

```suggestion
  if (Target *target = exe_ctx->GetTargetPtr()) {
if (Module *exe_module = target->GetExecutableModulePointer()) {
```

https://github.com/llvm/llvm-project/pull/123431
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Implement ${target.file} format variable (PR #123431)

2025-01-19 Thread Pavel Labath via lldb-commits

https://github.com/labath approved this pull request.


https://github.com/llvm/llvm-project/pull/123431
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [lldb] [lldb] Implement ${target.file} format variable (PR #123431)

2025-01-19 Thread Pavel Labath via lldb-commits


@@ -141,6 +141,10 @@ A complete list of currently supported format string 
variables is listed below:
 
+---+-+
 | ``target.arch``   | The architecture of the 
current target  


|
 
+---+-+
+| ``target.file.basename``  | The basename of the 
current current target  


|
++---+-+
+| ``target.file.fullpath``  | The path of the current 
current target  


|

labath wrote:

```suggestion
| ``target.file.basename``  | The basename of the 
current target  


|
+---+-+
| ``target.file.fullpath``  | The path of the current 
target  


|
```

https://github.com/llvm/llvm-project/pull/123431
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [clang] [lldb] [llvm] [StrTable] Switch intrinsics to StringTable and work around MSVC (PR #123548)

2025-01-19 Thread via lldb-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-adt

@llvm/pr-subscribers-clang

Author: Chandler Carruth (chandlerc)


Changes

**Note:** This PR depends on #123302 and #123308 -- only the 
last of the three commits should be reviewed here.

---

Historically, the main example of *very* large string tables used the
`EmitCharArray` to work around MSVC limitations with string literals,
but that was switched (without removing the API) in order to consolidate
on a nicer emission primitive.

While this large string table in `IntrinsicsImpl.inc` seems to compile
correctly on MSVC without the work around in `EmitCharArray` (and that
this PR adds back to the nicer emission path), other users have
repeatedly hit this MSVC limitation as you can see in the discussion on
PR https://github.com/llvm/llvm-project/pull/120534. This PR teaches the string 
offset table emission to look at
the size of the table and switch to the char array emission strategy
when the table becomes too large.

This work around does have the downside of making compile times worse
for large string tables, but that appears unavoidable until we can
identify known good MSVC versions and switch to requiring them for all
LLVM users. It also reduces searchability of the generated string table
-- I looked at emitting a comment with each string but it is tricky
because the escaping rules for an inline comment are different from
those of of a string literal, and there's no real way to turn the string
literal into a comment.

This PR also switches the `IntrinsicsImpl.inc` string tables over to the
new `StringTable` runtime abstraction. I didn't want to do this until
landing the MSVC workaround in case it caused even this example to start
hitting the MSVC bug, but I wanted to switch here so that I could
simplify the API for emitting the string table with the workaround
present. With the two different emission strategies, its important to
use a very exact syntax and that seems better encapsulated in the API.

This PR should unblock landing https://github.com/llvm/llvm-project/pull/120534 
and letting us switch all of
Clang's builtins to use string tables. That PR has all the details
motivating the overall effort.

Follow-up patches will try to consolidate the remaining users onto the
single interface, but those at least were easy to separate into
follow-ups and keep this PR somewhat smaller.

---

Patch is 37.28 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123548.diff


15 Files Affected:

- (modified) clang/lib/Basic/DiagnosticIDs.cpp (+8-10) 
- (modified) clang/lib/Frontend/CompilerInvocation.cpp (+1-1) 
- (modified) clang/tools/diagtool/DiagnosticNames.cpp (+2-1) 
- (modified) clang/utils/TableGen/ClangDiagnosticsEmitter.cpp (+6-18) 
- (modified) lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp (+2-1) 
- (modified) llvm/include/llvm/ADT/StringTable.h (+49-1) 
- (modified) llvm/include/llvm/Option/OptTable.h (+38-29) 
- (modified) llvm/include/llvm/TableGen/StringToOffsetTable.h (+51-31) 
- (modified) llvm/lib/IR/Intrinsics.cpp (+10-9) 
- (modified) llvm/lib/Option/OptTable.cpp (+36-34) 
- (modified) llvm/test/TableGen/MixedCasedMnemonic.td (+1-1) 
- (modified) llvm/tools/llvm-objdump/llvm-objdump.cpp (+2-1) 
- (modified) llvm/unittests/Option/OptionMarshallingTest.cpp (+2-1) 
- (modified) llvm/utils/TableGen/Basic/IntrinsicEmitter.cpp (+3-4) 
- (modified) llvm/utils/TableGen/OptionParserEmitter.cpp (+5-8) 


``diff
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp 
b/clang/lib/Basic/DiagnosticIDs.cpp
index d77f28c80b2eb2..55f868147134b7 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringTable.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
 #include 
@@ -618,11 +619,7 @@ namespace {
 uint16_t SubGroups;
 StringRef Documentation;
 
-// String is stored with a pascal-style length byte.
-StringRef getName() const {
-  return StringRef(DiagGroupNames + NameOffset + 1,
-   DiagGroupNames[NameOffset]);
-}
+StringRef getName() const { return DiagGroupNames[NameOffset]; }
   };
 }
 
@@ -669,11 +666,12 @@ StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned 
DiagID) {
 
 std::vector DiagnosticIDs::getDiagnosticFlags() {
   std::vector Res{"-W", "-Wno-"};
-  for (size_t I = 1; DiagGroupNames[I] != '\0';) {
-std::string Diag(DiagGroupNames + I + 1, DiagGroupNames[I]);
-I += DiagGroupNames[I] + 1;
-Res.push_back("-W" + Diag);
-Res.push_back("-Wno-" + Diag);
+  for (StringRef Name : DiagGroupNames) {
+if (Name.empty())
+  continue;
+
+Res.push_back((Twine("-W") + Name).str());
+Res.push_back((Twine("-Wno-") + Name).str());
   }
 
   return Res;
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Front

[Lldb-commits] [clang] [lldb] [llvm] Patch series to reapply #118734 and substantially improve it (PR #120534)

2025-01-19 Thread Chandler Carruth via lldb-commits

chandlerc wrote:

> Some good news, everything seems to pass after your latest changes in this 
> PR! I didn't believe it at first and did a clean rebuild and test to verify. 
> In the end everything passed again.
> 
> That being said, I am working on deploying an updated version of VS2019 to 
> our internal builders. I have gotten agreement with the plan, the only 
> possible hold-up is that we would need to rebuild our internal builders which 
> likely may not get done for a few weeks. I can apply quick work-arounds to 
> the machines, but they ultimately will need to be rebuilt.
> 
> Would you prefer to move forward with the solution currently implemented in 
> this PR or for us to update our internal builders?

Let's see if we can land the solution in this PR. Now that its done, it'll 
remove all time pressure on upgrading.

Mostly need code review on all the factored out PRs, currently:

- [ ] #123308 
- [ ] #123302
- [ ] #123548 
- [ ] #122873 
- [ ] #123460

Once all of those land, I can polish this up a bit to make it easier to review 
the latest version.

https://github.com/llvm/llvm-project/pull/120534
___
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits


[Lldb-commits] [clang] [lldb] [llvm] [StrTable] Switch intrinsics to StringTable and work around MSVC (PR #123548)

2025-01-19 Thread Chandler Carruth via lldb-commits

https://github.com/chandlerc created 
https://github.com/llvm/llvm-project/pull/123548

**Note:** This PR depends on #123302 and #123308 -- only the last of the three 
commits should be reviewed here.

---

Historically, the main example of *very* large string tables used the
`EmitCharArray` to work around MSVC limitations with string literals,
but that was switched (without removing the API) in order to consolidate
on a nicer emission primitive.

While this large string table in `IntrinsicsImpl.inc` seems to compile
correctly on MSVC without the work around in `EmitCharArray` (and that
this PR adds back to the nicer emission path), other users have
repeatedly hit this MSVC limitation as you can see in the discussion on
PR https://github.com/llvm/llvm-project/pull/120534. This PR teaches the string 
offset table emission to look at
the size of the table and switch to the char array emission strategy
when the table becomes too large.

This work around does have the downside of making compile times worse
for large string tables, but that appears unavoidable until we can
identify known good MSVC versions and switch to requiring them for all
LLVM users. It also reduces searchability of the generated string table
-- I looked at emitting a comment with each string but it is tricky
because the escaping rules for an inline comment are different from
those of of a string literal, and there's no real way to turn the string
literal into a comment.

This PR also switches the `IntrinsicsImpl.inc` string tables over to the
new `StringTable` runtime abstraction. I didn't want to do this until
landing the MSVC workaround in case it caused even this example to start
hitting the MSVC bug, but I wanted to switch here so that I could
simplify the API for emitting the string table with the workaround
present. With the two different emission strategies, its important to
use a very exact syntax and that seems better encapsulated in the API.

This PR should unblock landing https://github.com/llvm/llvm-project/pull/120534 
and letting us switch all of
Clang's builtins to use string tables. That PR has all the details
motivating the overall effort.

Follow-up patches will try to consolidate the remaining users onto the
single interface, but those at least were easy to separate into
follow-ups and keep this PR somewhat smaller.

>From 9a525fa4322d6a46154305097185ec017916d01f Mon Sep 17 00:00:00 2001
From: Chandler Carruth 
Date: Fri, 17 Jan 2025 08:50:44 +
Subject: [PATCH 1/3] [StrTable] Switch the option parser to
 `llvm::StringTable`

Now that we have a dedicated abstraction for string tables, switch the
option parser library's string table over to it rather than using a raw
`const char*`. Also try to use the `StringTable::Offset` type rather
than a raw `unsigned` where we can to avoid accidental increments or
other issues.

This is based on review feedback for the initial switch of options to
a string table. Happy to tweak or adjust if desired here.
---
 clang/lib/Frontend/CompilerInvocation.cpp |  2 +-
 .../Platform/MacOSX/PlatformDarwin.cpp|  3 +-
 llvm/include/llvm/Option/OptTable.h   | 67 ++
 llvm/lib/Option/OptTable.cpp  | 70 ++-
 llvm/tools/llvm-objdump/llvm-objdump.cpp  |  3 +-
 .../Option/OptionMarshallingTest.cpp  |  3 +-
 llvm/utils/TableGen/OptionParserEmitter.cpp   | 11 +--
 7 files changed, 88 insertions(+), 71 deletions(-)

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index 58658dedbaf1ee..3bf124e4827be9 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -282,7 +282,7 @@ using ArgumentConsumer = 
CompilerInvocation::ArgumentConsumer;
 #undef OPTTABLE_STR_TABLE_CODE
 
 static llvm::StringRef lookupStrInTable(unsigned Offset) {
-  return &OptionStrTable[Offset];
+  return OptionStrTable[Offset];
 }
 
 #define SIMPLE_ENUM_VALUE_TABLE
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp 
b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index 2a36f95c94d0ce..51e9a6d81b8390 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -42,6 +42,7 @@
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/Timer.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringTable.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Threading.h"
@@ -1083,7 +1084,7 @@ void 
PlatformDarwin::AddClangModuleCompilationOptionsForSDKType(
   if (!version.empty() && sdk_type != XcodeSDK::Type::Linux &&
   sdk_type != XcodeSDK::Type::XROS) {
 #define OPTION(PREFIX_OFFSET, NAME_OFFSET, VAR, ...)   
\
-  llvm::StringRef opt_##VAR = &OptionStrTable[NAME_OFFSET];
\
+  llvm::StringRef opt_##VAR = OptionStrTable[NAME_OFFSET]; 
\
   (void)opt_##V