https://github.com/JDevlieghere updated 
https://github.com/llvm/llvm-project/pull/181279

>From 3a08e2aa927886dbd7b4d133a0b6a376d69d3094 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <[email protected]>
Date: Thu, 12 Feb 2026 16:23:00 -0800
Subject: [PATCH 1/4] [lldb] Add tree-sitter based syntax highlighting

This adds the necessary infrastructure to use tree-sitter for syntax
highlighting in LLDB. It provides the base class for a tree-sitter
highlighter plugin. Its primary function is interfacing with the
tree-sitter library, and converting captures to highlighting styles.

Adding a new tree-sitter highlighter consists of creating an LLDB plugin
that inherits from this class. The plugin has two core responsibilities:

1. Loading the tree-sitter grammar.
2. Specifying the tree-sitter syntax highlighting query.

Everything else is handled by the base class, making it extremely easy
to add a new language.

For more context and the motivation behind using tree-sitter for syntax
highlighting, see #170250.
---
 lldb/cmake/modules/FindTreeSitter.cmake       |  18 ++
 lldb/cmake/modules/LLDBConfig.cmake           |   1 +
 .../source/Plugins/Highlighter/CMakeLists.txt |   3 +
 .../Highlighter/TreeSitter/CMakeLists.txt     |  10 +
 .../TreeSitter/TreeSitterHighlighter.cpp      | 231 ++++++++++++++++++
 .../TreeSitter/TreeSitterHighlighter.h        |  77 ++++++
 6 files changed, 340 insertions(+)
 create mode 100644 lldb/cmake/modules/FindTreeSitter.cmake
 create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
 create mode 100644 
lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
 create mode 100644 
lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h

diff --git a/lldb/cmake/modules/FindTreeSitter.cmake 
b/lldb/cmake/modules/FindTreeSitter.cmake
new file mode 100644
index 0000000000000..04a40507cc9ab
--- /dev/null
+++ b/lldb/cmake/modules/FindTreeSitter.cmake
@@ -0,0 +1,18 @@
+# FindTreeSitter.cmake
+
+include(FindPackageHandleStandardArgs)
+
+find_path(TreeSitter_INCLUDE_DIR
+  NAMES tree_sitter/api.h)
+
+find_library(TreeSitter_LIBRARY
+  NAMES tree-sitter treesitter)
+
+find_package_handle_standard_args(TreeSitter
+  REQUIRED_VARS TreeSitter_LIBRARY TreeSitter_INCLUDE_DIR
+)
+
+mark_as_advanced(
+  TreeSitter_INCLUDE_DIR
+  TreeSitter_LIBRARY
+)
diff --git a/lldb/cmake/modules/LLDBConfig.cmake 
b/lldb/cmake/modules/LLDBConfig.cmake
index d4471b8a5418d..f00824d692fe1 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -64,6 +64,7 @@ add_optional_dependency(LLDB_ENABLE_LUA "Enable Lua scripting 
support in LLDB" L
 add_optional_dependency(LLDB_ENABLE_PYTHON "Enable Python scripting support in 
LLDB" PythonAndSwig PYTHONANDSWIG_FOUND)
 add_optional_dependency(LLDB_ENABLE_LIBXML2 "Enable Libxml 2 support in LLDB" 
LibXml2 LIBXML2_FOUND VERSION ${LLDB_LIBXML2_VERSION})
 add_optional_dependency(LLDB_ENABLE_FBSDVMCORE "Enable libfbsdvmcore support 
in LLDB" FBSDVMCore FBSDVMCore_FOUND QUIET)
+add_optional_dependency(LLDB_ENABLE_TREESITTER "Enable Tree-sitter syntax 
highlighting" TreeSitter TREESITTER_FOUND)
 
 option(LLDB_USE_ENTITLEMENTS "When codesigning, use entitlements if available" 
ON)
 option(LLDB_BUILD_FRAMEWORK "Build LLDB.framework (Darwin only)" OFF)
diff --git a/lldb/source/Plugins/Highlighter/CMakeLists.txt 
b/lldb/source/Plugins/Highlighter/CMakeLists.txt
index 88033867b1a12..704ea5f145794 100644
--- a/lldb/source/Plugins/Highlighter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/CMakeLists.txt
@@ -1,2 +1,5 @@
+if (LLDB_ENABLE_TREESITTER)
+  add_subdirectory(TreeSitter)
+endif()
 add_subdirectory(Clang)
 add_subdirectory(Default)
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt 
b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
new file mode 100644
index 0000000000000..99c03da999720
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_lldb_library(lldbTreeSitter
+  TreeSitterHighlighter.cpp
+
+  LINK_COMPONENTS
+    Support
+    ADT
+  LINK_LIBS
+    lldbUtility
+    ${TreeSitter_LIBRARY}
+)
diff --git 
a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp 
b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
new file mode 100644
index 0000000000000..181dc90683597
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
@@ -0,0 +1,231 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TreeSitterHighlighter.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/StreamString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace lldb_private;
+
+TreeSitterHighlighter::TSState::~TSState() {
+  if (query)
+    ts_query_delete(query);
+  if (parser)
+    ts_parser_delete(parser);
+}
+
+TreeSitterHighlighter::TSState::operator bool() const {
+  return parser && query;
+}
+
+TreeSitterHighlighter::TSState &TreeSitterHighlighter::GetTSState() const {
+  if (m_ts_state)
+    return *m_ts_state;
+
+  Log *log = GetLog(LLDBLog::Source);
+
+  m_ts_state.emplace();
+  m_ts_state->parser = ts_parser_new();
+  if (!m_ts_state->parser) {
+    LLDB_LOG(log, "Creating tree-sitter parser failed for {0}", GetName());
+    return *m_ts_state;
+  }
+
+  const TSLanguage *language = GetLanguage();
+  if (!language || !ts_parser_set_language(m_ts_state->parser, language)) {
+    LLDB_LOG(log, "Creating tree-sitter language failed for {0}", GetName());
+    return *m_ts_state;
+  }
+
+  llvm::StringRef query_source = GetHighlightQuery();
+  uint32_t error_offset = 0;
+  TSQueryError error_type = TSQueryErrorNone;
+  m_ts_state->query = ts_query_new(language, query_source.data(),
+                                   static_cast<uint32_t>(query_source.size()),
+                                   &error_offset, &error_type);
+  if (!m_ts_state->query || error_type != TSQueryErrorNone) {
+    LLDB_LOG(log,
+             "Creating tree-sitter query failed for {0} with error {1}: {2}",
+             GetName(), error_type, query_source.substr(error_offset, 64));
+    // If we have an error but a valid query, we need to reset the object to
+    // (1) avoid it looking valid and (2) release the parser.
+    m_ts_state.emplace();
+  }
+
+  return *m_ts_state;
+}
+
+const HighlightStyle::ColorStyle *
+TreeSitterHighlighter::GetStyleForCapture(llvm::StringRef capture_name,
+                                          const HighlightStyle &options) const 
{
+  return llvm::StringSwitch<const HighlightStyle::ColorStyle *>(capture_name)
+      .Case("comment", &options.comment)
+      .Case("keyword", &options.keyword)
+      .Case("operator", &options.operators)
+      .Case("type", &options.keyword)
+      .Case("punctuation.delimiter.comma", &options.comma)
+      .Case("punctuation.delimiter.colon", &options.colon)
+      .Case("punctuation.delimiter.semicolon", &options.semicolons)
+      .Case("punctuation.bracket.square", &options.square_brackets)
+      .Cases({"keyword.directive", "preproc"}, &options.pp_directive)
+      .Cases({"string", "string.literal"}, &options.string_literal)
+      .Cases({"number", "number.literal", "constant.numeric"},
+             &options.scalar_literal)
+      .Cases({"identifier", "variable", "function"}, &options.identifier)
+      .Cases({"punctuation.bracket.curly", "punctuation.brace"},
+             &options.braces)
+      .Cases({"punctuation.bracket.round", "punctuation.bracket",
+              "punctuation.paren"},
+             &options.parentheses)
+      .Default(nullptr);
+}
+
+void TreeSitterHighlighter::HighlightRange(
+    const HighlightStyle &options, llvm::StringRef text, uint32_t start_byte,
+    uint32_t end_byte, const HighlightStyle::ColorStyle *style,
+    std::optional<size_t> cursor_pos, bool &highlighted_cursor,
+    Stream &s) const {
+
+  if (start_byte >= end_byte || start_byte >= text.size())
+    return;
+
+  end_byte = std::min(end_byte, static_cast<uint32_t>(text.size()));
+
+  llvm::StringRef range = text.substr(start_byte, end_byte - start_byte);
+
+  auto print = [&](llvm::StringRef str) {
+    if (style)
+      style->Apply(s, str);
+    else
+      s << str;
+  };
+
+  // Check if cursor is within this range.
+  if (cursor_pos && *cursor_pos >= start_byte && *cursor_pos < end_byte &&
+      !highlighted_cursor) {
+    highlighted_cursor = true;
+
+    // Split range around cursor position.
+    const size_t cursor_in_range = *cursor_pos - start_byte;
+
+    // Print everything before the cursor.
+    if (cursor_in_range > 0) {
+      llvm::StringRef before = range.substr(0, cursor_in_range);
+      print(before);
+    }
+
+    // Print the cursor itself.
+    if (cursor_in_range < range.size()) {
+      StreamString cursor_str;
+      llvm::StringRef cursor_char = range.substr(cursor_in_range, 1);
+      if (style)
+        style->Apply(cursor_str, cursor_char);
+      else
+        cursor_str << cursor_char;
+      options.selected.Apply(s, cursor_str.GetString());
+    }
+
+    // Print everything after the cursor.
+    if (cursor_in_range + 1 < range.size()) {
+      llvm::StringRef after = range.substr(cursor_in_range + 1);
+      print(after);
+    }
+  } else {
+    // No cursor in this range, apply style directly.
+    print(range);
+  }
+}
+
+void TreeSitterHighlighter::Highlight(const HighlightStyle &options,
+                                      llvm::StringRef line,
+                                      std::optional<size_t> cursor_pos,
+                                      llvm::StringRef previous_lines,
+                                      Stream &s) const {
+  auto unformatted = [&]() -> void { s << line; };
+
+  TSState &ts_state = GetTSState();
+  if (!ts_state)
+    return unformatted();
+
+  std::string source = previous_lines.str() + line.str();
+  TSTree *tree =
+      ts_parser_parse_string(ts_state.parser, nullptr, source.c_str(),
+                             static_cast<uint32_t>(source.size()));
+  if (!tree)
+    return unformatted();
+
+  TSQueryCursor *cursor = ts_query_cursor_new();
+  assert(cursor);
+
+  llvm::scope_exit delete_cusor([&] { ts_query_cursor_delete(cursor); });
+
+  TSNode root_node = ts_tree_root_node(tree);
+  ts_query_cursor_exec(cursor, ts_state.query, root_node);
+
+  // Collect all matches and their byte ranges.
+  std::vector<HLRange> highlights;
+  TSQueryMatch match;
+  uint32_t capture_index;
+  while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) {
+    TSQueryCapture capture = match.captures[capture_index];
+
+    uint32_t capture_name_len = 0;
+    const char *capture_name = ts_query_capture_name_for_id(
+        ts_state.query, capture.index, &capture_name_len);
+
+    const HighlightStyle::ColorStyle *style = GetStyleForCapture(
+        llvm::StringRef(capture_name, capture_name_len), options);
+
+    TSNode node = capture.node;
+    uint32_t start = ts_node_start_byte(node);
+    uint32_t end = ts_node_end_byte(node);
+
+    if (style && start < end)
+      highlights.push_back({start, end, style});
+  }
+
+  std::sort(highlights.begin(), highlights.end(),
+            [](const HLRange &a, const HLRange &b) {
+              if (a.start_byte != b.start_byte)
+                return a.start_byte < b.start_byte;
+              // Prefer longer matches.
+              return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte);
+            });
+
+  uint32_t current_pos = 0;
+  bool highlighted_cursor = false;
+
+  for (const auto &h : highlights) {
+    // Skip over highlights that start before our current position, which means
+    // there's overlap.
+    if (h.start_byte < current_pos)
+      continue;
+
+    // Output any unhighlighted text before this highlight.
+    if (current_pos < h.start_byte) {
+      HighlightRange(options, line, current_pos, h.start_byte, nullptr,
+                     cursor_pos, highlighted_cursor, s);
+      current_pos = h.start_byte;
+    }
+
+    // Output the highlighted range.
+    HighlightRange(options, line, h.start_byte, h.end_byte, h.style, 
cursor_pos,
+                   highlighted_cursor, s);
+    current_pos = h.end_byte;
+  }
+
+  // Output any remaining unhighlighted text.
+  if (current_pos < line.size()) {
+    HighlightRange(options, line, current_pos,
+                   static_cast<uint32_t>(line.size()), nullptr, cursor_pos,
+                   highlighted_cursor, s);
+  }
+}
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h 
b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
new file mode 100644
index 0000000000000..38530400b2b1d
--- /dev/null
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
+
+#include "lldb/Core/Highlighter.h"
+#include "lldb/Utility/Stream.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <optional>
+#include <tree_sitter/api.h>
+
+namespace lldb_private {
+
+struct TSState;
+
+class TreeSitterHighlighter : public Highlighter {
+public:
+  TreeSitterHighlighter() = default;
+  ~TreeSitterHighlighter() override = default;
+
+  /// Highlights a single line of code using tree-sitter parsing.
+  void Highlight(const HighlightStyle &options, llvm::StringRef line,
+                 std::optional<size_t> cursor_pos,
+                 llvm::StringRef previous_lines, Stream &s) const override;
+
+protected:
+  /// Returns the tree-sitter language for this highlighter.
+  virtual const TSLanguage *GetLanguage() const = 0;
+
+  /// Returns the tree-sitter highlight query for this language.
+  virtual llvm::StringRef GetHighlightQuery() const = 0;
+
+private:
+  /// Maps a tree-sitter capture name to a HighlightStyle color.
+  const HighlightStyle::ColorStyle *
+  GetStyleForCapture(llvm::StringRef capture_name,
+                     const HighlightStyle &options) const;
+
+  /// Applies syntax highlighting to a range of text.
+  void HighlightRange(const HighlightStyle &options, llvm::StringRef text,
+                      uint32_t start_byte, uint32_t end_byte,
+                      const HighlightStyle::ColorStyle *style,
+                      std::optional<size_t> cursor_pos,
+                      bool &highlighted_cursor, Stream &s) const;
+
+  struct HLRange {
+    uint32_t start_byte;
+    uint32_t end_byte;
+    const HighlightStyle::ColorStyle *style;
+  };
+
+  struct TSState {
+    TSState() = default;
+    TSState &operator=(const TSState &) = delete;
+    TSState(const TSState &) = delete;
+    ~TSState();
+
+    explicit operator bool() const;
+    TSParser *parser = nullptr;
+    TSQuery *query = nullptr;
+  };
+
+  /// Lazily creates a tree-sitter state (TSState).
+  TSState &GetTSState() const;
+  mutable std::optional<TSState> m_ts_state;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H

>From d766461d72346dcc9f2a8ad0e32186242286bd51 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <[email protected]>
Date: Thu, 12 Feb 2026 16:45:45 -0800
Subject: [PATCH 2/4] Fix bogus component

---
 lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt 
b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
index 99c03da999720..f85595d3b574c 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -3,7 +3,6 @@ add_lldb_library(lldbTreeSitter
 
   LINK_COMPONENTS
     Support
-    ADT
   LINK_LIBS
     lldbUtility
     ${TreeSitter_LIBRARY}

>From 0d98d2e8c04aa3a1cf81f9dcd3a104ec8b4ca1be Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <[email protected]>
Date: Thu, 12 Feb 2026 17:23:35 -0800
Subject: [PATCH 3/4] Address Alex' feedback

---
 lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt     | 1 +
 .../Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h    | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt 
b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
index f85595d3b574c..af942ab39c569 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_lldb_library(lldbTreeSitter
   LINK_COMPONENTS
     Support
   LINK_LIBS
+    lldbCore
     lldbUtility
     ${TreeSitter_LIBRARY}
 )
diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h 
b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
index 38530400b2b1d..afafbaec92f71 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h
@@ -6,8 +6,8 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
-#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H
+#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_TREESITTERHIGHLIGHTER_H
+#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_TREESITTERHIGHLIGHTER_H
 
 #include "lldb/Core/Highlighter.h"
 #include "lldb/Utility/Stream.h"

>From 3819666fec679cfec19f8ac694514d02171067df Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <[email protected]>
Date: Fri, 13 Feb 2026 10:57:17 -0800
Subject: [PATCH 4/4] Prefer shorter matches as suggested by Charles

---
 .../Highlighter/TreeSitter/TreeSitterHighlighter.cpp        | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp 
b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
index 181dc90683597..a109471a313ca 100644
--- a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
+++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp
@@ -196,8 +196,10 @@ void TreeSitterHighlighter::Highlight(const HighlightStyle 
&options,
             [](const HLRange &a, const HLRange &b) {
               if (a.start_byte != b.start_byte)
                 return a.start_byte < b.start_byte;
-              // Prefer longer matches.
-              return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte);
+              // Prefer shorter matches. For example, if we have an expression
+              // consisting of a variable and a property, we want to highlight
+              // them as individual components.
+              return (b.end_byte - b.start_byte) > (a.end_byte - a.start_byte);
             });
 
   uint32_t current_pos = 0;

_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to