https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/181279
>From 3a08e2aa927886dbd7b4d133a0b6a376d69d3094 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <[email protected]> Date: Thu, 12 Feb 2026 16:23:00 -0800 Subject: [PATCH 1/4] [lldb] Add tree-sitter based syntax highlighting This adds the necessary infrastructure to use tree-sitter for syntax highlighting in LLDB. It provides the base class for a tree-sitter highlighter plugin. Its primary function is interfacing with the tree-sitter library, and converting captures to highlighting styles. Adding a new tree-sitter highlighter consists of creating an LLDB plugin that inherits from this class. The plugin has two core responsibilities: 1. Loading the tree-sitter grammar. 2. Specifying the tree-sitter syntax highlighting query. Everything else is handled by the base class, making it extremely easy to add a new language. For more context and the motivation behind using tree-sitter for syntax highlighting, see #170250. --- lldb/cmake/modules/FindTreeSitter.cmake | 18 ++ lldb/cmake/modules/LLDBConfig.cmake | 1 + .../source/Plugins/Highlighter/CMakeLists.txt | 3 + .../Highlighter/TreeSitter/CMakeLists.txt | 10 + .../TreeSitter/TreeSitterHighlighter.cpp | 231 ++++++++++++++++++ .../TreeSitter/TreeSitterHighlighter.h | 77 ++++++ 6 files changed, 340 insertions(+) create mode 100644 lldb/cmake/modules/FindTreeSitter.cmake create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp create mode 100644 lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h diff --git a/lldb/cmake/modules/FindTreeSitter.cmake b/lldb/cmake/modules/FindTreeSitter.cmake new file mode 100644 index 0000000000000..04a40507cc9ab --- /dev/null +++ b/lldb/cmake/modules/FindTreeSitter.cmake @@ -0,0 +1,18 @@ +# FindTreeSitter.cmake + +include(FindPackageHandleStandardArgs) + +find_path(TreeSitter_INCLUDE_DIR + NAMES tree_sitter/api.h) + +find_library(TreeSitter_LIBRARY + NAMES tree-sitter treesitter) + +find_package_handle_standard_args(TreeSitter + REQUIRED_VARS TreeSitter_LIBRARY TreeSitter_INCLUDE_DIR +) + +mark_as_advanced( + TreeSitter_INCLUDE_DIR + TreeSitter_LIBRARY +) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index d4471b8a5418d..f00824d692fe1 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -64,6 +64,7 @@ add_optional_dependency(LLDB_ENABLE_LUA "Enable Lua scripting support in LLDB" L add_optional_dependency(LLDB_ENABLE_PYTHON "Enable Python scripting support in LLDB" PythonAndSwig PYTHONANDSWIG_FOUND) add_optional_dependency(LLDB_ENABLE_LIBXML2 "Enable Libxml 2 support in LLDB" LibXml2 LIBXML2_FOUND VERSION ${LLDB_LIBXML2_VERSION}) add_optional_dependency(LLDB_ENABLE_FBSDVMCORE "Enable libfbsdvmcore support in LLDB" FBSDVMCore FBSDVMCore_FOUND QUIET) +add_optional_dependency(LLDB_ENABLE_TREESITTER "Enable Tree-sitter syntax highlighting" TreeSitter TREESITTER_FOUND) option(LLDB_USE_ENTITLEMENTS "When codesigning, use entitlements if available" ON) option(LLDB_BUILD_FRAMEWORK "Build LLDB.framework (Darwin only)" OFF) diff --git a/lldb/source/Plugins/Highlighter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/CMakeLists.txt index 88033867b1a12..704ea5f145794 100644 --- a/lldb/source/Plugins/Highlighter/CMakeLists.txt +++ b/lldb/source/Plugins/Highlighter/CMakeLists.txt @@ -1,2 +1,5 @@ +if (LLDB_ENABLE_TREESITTER) + add_subdirectory(TreeSitter) +endif() add_subdirectory(Clang) add_subdirectory(Default) diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt new file mode 100644 index 0000000000000..99c03da999720 --- /dev/null +++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt @@ -0,0 +1,10 @@ +add_lldb_library(lldbTreeSitter + TreeSitterHighlighter.cpp + + LINK_COMPONENTS + Support + ADT + LINK_LIBS + lldbUtility + ${TreeSitter_LIBRARY} +) diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp new file mode 100644 index 0000000000000..181dc90683597 --- /dev/null +++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp @@ -0,0 +1,231 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TreeSitterHighlighter.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/StreamString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace lldb_private; + +TreeSitterHighlighter::TSState::~TSState() { + if (query) + ts_query_delete(query); + if (parser) + ts_parser_delete(parser); +} + +TreeSitterHighlighter::TSState::operator bool() const { + return parser && query; +} + +TreeSitterHighlighter::TSState &TreeSitterHighlighter::GetTSState() const { + if (m_ts_state) + return *m_ts_state; + + Log *log = GetLog(LLDBLog::Source); + + m_ts_state.emplace(); + m_ts_state->parser = ts_parser_new(); + if (!m_ts_state->parser) { + LLDB_LOG(log, "Creating tree-sitter parser failed for {0}", GetName()); + return *m_ts_state; + } + + const TSLanguage *language = GetLanguage(); + if (!language || !ts_parser_set_language(m_ts_state->parser, language)) { + LLDB_LOG(log, "Creating tree-sitter language failed for {0}", GetName()); + return *m_ts_state; + } + + llvm::StringRef query_source = GetHighlightQuery(); + uint32_t error_offset = 0; + TSQueryError error_type = TSQueryErrorNone; + m_ts_state->query = ts_query_new(language, query_source.data(), + static_cast<uint32_t>(query_source.size()), + &error_offset, &error_type); + if (!m_ts_state->query || error_type != TSQueryErrorNone) { + LLDB_LOG(log, + "Creating tree-sitter query failed for {0} with error {1}: {2}", + GetName(), error_type, query_source.substr(error_offset, 64)); + // If we have an error but a valid query, we need to reset the object to + // (1) avoid it looking valid and (2) release the parser. + m_ts_state.emplace(); + } + + return *m_ts_state; +} + +const HighlightStyle::ColorStyle * +TreeSitterHighlighter::GetStyleForCapture(llvm::StringRef capture_name, + const HighlightStyle &options) const { + return llvm::StringSwitch<const HighlightStyle::ColorStyle *>(capture_name) + .Case("comment", &options.comment) + .Case("keyword", &options.keyword) + .Case("operator", &options.operators) + .Case("type", &options.keyword) + .Case("punctuation.delimiter.comma", &options.comma) + .Case("punctuation.delimiter.colon", &options.colon) + .Case("punctuation.delimiter.semicolon", &options.semicolons) + .Case("punctuation.bracket.square", &options.square_brackets) + .Cases({"keyword.directive", "preproc"}, &options.pp_directive) + .Cases({"string", "string.literal"}, &options.string_literal) + .Cases({"number", "number.literal", "constant.numeric"}, + &options.scalar_literal) + .Cases({"identifier", "variable", "function"}, &options.identifier) + .Cases({"punctuation.bracket.curly", "punctuation.brace"}, + &options.braces) + .Cases({"punctuation.bracket.round", "punctuation.bracket", + "punctuation.paren"}, + &options.parentheses) + .Default(nullptr); +} + +void TreeSitterHighlighter::HighlightRange( + const HighlightStyle &options, llvm::StringRef text, uint32_t start_byte, + uint32_t end_byte, const HighlightStyle::ColorStyle *style, + std::optional<size_t> cursor_pos, bool &highlighted_cursor, + Stream &s) const { + + if (start_byte >= end_byte || start_byte >= text.size()) + return; + + end_byte = std::min(end_byte, static_cast<uint32_t>(text.size())); + + llvm::StringRef range = text.substr(start_byte, end_byte - start_byte); + + auto print = [&](llvm::StringRef str) { + if (style) + style->Apply(s, str); + else + s << str; + }; + + // Check if cursor is within this range. + if (cursor_pos && *cursor_pos >= start_byte && *cursor_pos < end_byte && + !highlighted_cursor) { + highlighted_cursor = true; + + // Split range around cursor position. + const size_t cursor_in_range = *cursor_pos - start_byte; + + // Print everything before the cursor. + if (cursor_in_range > 0) { + llvm::StringRef before = range.substr(0, cursor_in_range); + print(before); + } + + // Print the cursor itself. + if (cursor_in_range < range.size()) { + StreamString cursor_str; + llvm::StringRef cursor_char = range.substr(cursor_in_range, 1); + if (style) + style->Apply(cursor_str, cursor_char); + else + cursor_str << cursor_char; + options.selected.Apply(s, cursor_str.GetString()); + } + + // Print everything after the cursor. + if (cursor_in_range + 1 < range.size()) { + llvm::StringRef after = range.substr(cursor_in_range + 1); + print(after); + } + } else { + // No cursor in this range, apply style directly. + print(range); + } +} + +void TreeSitterHighlighter::Highlight(const HighlightStyle &options, + llvm::StringRef line, + std::optional<size_t> cursor_pos, + llvm::StringRef previous_lines, + Stream &s) const { + auto unformatted = [&]() -> void { s << line; }; + + TSState &ts_state = GetTSState(); + if (!ts_state) + return unformatted(); + + std::string source = previous_lines.str() + line.str(); + TSTree *tree = + ts_parser_parse_string(ts_state.parser, nullptr, source.c_str(), + static_cast<uint32_t>(source.size())); + if (!tree) + return unformatted(); + + TSQueryCursor *cursor = ts_query_cursor_new(); + assert(cursor); + + llvm::scope_exit delete_cusor([&] { ts_query_cursor_delete(cursor); }); + + TSNode root_node = ts_tree_root_node(tree); + ts_query_cursor_exec(cursor, ts_state.query, root_node); + + // Collect all matches and their byte ranges. + std::vector<HLRange> highlights; + TSQueryMatch match; + uint32_t capture_index; + while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) { + TSQueryCapture capture = match.captures[capture_index]; + + uint32_t capture_name_len = 0; + const char *capture_name = ts_query_capture_name_for_id( + ts_state.query, capture.index, &capture_name_len); + + const HighlightStyle::ColorStyle *style = GetStyleForCapture( + llvm::StringRef(capture_name, capture_name_len), options); + + TSNode node = capture.node; + uint32_t start = ts_node_start_byte(node); + uint32_t end = ts_node_end_byte(node); + + if (style && start < end) + highlights.push_back({start, end, style}); + } + + std::sort(highlights.begin(), highlights.end(), + [](const HLRange &a, const HLRange &b) { + if (a.start_byte != b.start_byte) + return a.start_byte < b.start_byte; + // Prefer longer matches. + return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte); + }); + + uint32_t current_pos = 0; + bool highlighted_cursor = false; + + for (const auto &h : highlights) { + // Skip over highlights that start before our current position, which means + // there's overlap. + if (h.start_byte < current_pos) + continue; + + // Output any unhighlighted text before this highlight. + if (current_pos < h.start_byte) { + HighlightRange(options, line, current_pos, h.start_byte, nullptr, + cursor_pos, highlighted_cursor, s); + current_pos = h.start_byte; + } + + // Output the highlighted range. + HighlightRange(options, line, h.start_byte, h.end_byte, h.style, cursor_pos, + highlighted_cursor, s); + current_pos = h.end_byte; + } + + // Output any remaining unhighlighted text. + if (current_pos < line.size()) { + HighlightRange(options, line, current_pos, + static_cast<uint32_t>(line.size()), nullptr, cursor_pos, + highlighted_cursor, s); + } +} diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h new file mode 100644 index 0000000000000..38530400b2b1d --- /dev/null +++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H + +#include "lldb/Core/Highlighter.h" +#include "lldb/Utility/Stream.h" +#include "llvm/ADT/StringRef.h" + +#include <optional> +#include <tree_sitter/api.h> + +namespace lldb_private { + +struct TSState; + +class TreeSitterHighlighter : public Highlighter { +public: + TreeSitterHighlighter() = default; + ~TreeSitterHighlighter() override = default; + + /// Highlights a single line of code using tree-sitter parsing. + void Highlight(const HighlightStyle &options, llvm::StringRef line, + std::optional<size_t> cursor_pos, + llvm::StringRef previous_lines, Stream &s) const override; + +protected: + /// Returns the tree-sitter language for this highlighter. + virtual const TSLanguage *GetLanguage() const = 0; + + /// Returns the tree-sitter highlight query for this language. + virtual llvm::StringRef GetHighlightQuery() const = 0; + +private: + /// Maps a tree-sitter capture name to a HighlightStyle color. + const HighlightStyle::ColorStyle * + GetStyleForCapture(llvm::StringRef capture_name, + const HighlightStyle &options) const; + + /// Applies syntax highlighting to a range of text. + void HighlightRange(const HighlightStyle &options, llvm::StringRef text, + uint32_t start_byte, uint32_t end_byte, + const HighlightStyle::ColorStyle *style, + std::optional<size_t> cursor_pos, + bool &highlighted_cursor, Stream &s) const; + + struct HLRange { + uint32_t start_byte; + uint32_t end_byte; + const HighlightStyle::ColorStyle *style; + }; + + struct TSState { + TSState() = default; + TSState &operator=(const TSState &) = delete; + TSState(const TSState &) = delete; + ~TSState(); + + explicit operator bool() const; + TSParser *parser = nullptr; + TSQuery *query = nullptr; + }; + + /// Lazily creates a tree-sitter state (TSState). + TSState &GetTSState() const; + mutable std::optional<TSState> m_ts_state; +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H >From d766461d72346dcc9f2a8ad0e32186242286bd51 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <[email protected]> Date: Thu, 12 Feb 2026 16:45:45 -0800 Subject: [PATCH 2/4] Fix bogus component --- lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt index 99c03da999720..f85595d3b574c 100644 --- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt +++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt @@ -3,7 +3,6 @@ add_lldb_library(lldbTreeSitter LINK_COMPONENTS Support - ADT LINK_LIBS lldbUtility ${TreeSitter_LIBRARY} >From 0d98d2e8c04aa3a1cf81f9dcd3a104ec8b4ca1be Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <[email protected]> Date: Thu, 12 Feb 2026 17:23:35 -0800 Subject: [PATCH 3/4] Address Alex' feedback --- lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt | 1 + .../Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt index f85595d3b574c..af942ab39c569 100644 --- a/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt +++ b/lldb/source/Plugins/Highlighter/TreeSitter/CMakeLists.txt @@ -4,6 +4,7 @@ add_lldb_library(lldbTreeSitter LINK_COMPONENTS Support LINK_LIBS + lldbCore lldbUtility ${TreeSitter_LIBRARY} ) diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h index 38530400b2b1d..afafbaec92f71 100644 --- a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h +++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H -#define LLDB_SOURCE_PLUGINS_LANGUAGE_TREESITTERCOMMON_TREESITTERHIGHLIGHTER_H +#ifndef LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_TREESITTERHIGHLIGHTER_H +#define LLDB_SOURCE_PLUGINS_HIGHLIGHTER_TREESITTER_TREESITTERHIGHLIGHTER_H #include "lldb/Core/Highlighter.h" #include "lldb/Utility/Stream.h" >From 3819666fec679cfec19f8ac694514d02171067df Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere <[email protected]> Date: Fri, 13 Feb 2026 10:57:17 -0800 Subject: [PATCH 4/4] Prefer shorter matches as suggested by Charles --- .../Highlighter/TreeSitter/TreeSitterHighlighter.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp index 181dc90683597..a109471a313ca 100644 --- a/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp +++ b/lldb/source/Plugins/Highlighter/TreeSitter/TreeSitterHighlighter.cpp @@ -196,8 +196,10 @@ void TreeSitterHighlighter::Highlight(const HighlightStyle &options, [](const HLRange &a, const HLRange &b) { if (a.start_byte != b.start_byte) return a.start_byte < b.start_byte; - // Prefer longer matches. - return (a.end_byte - a.start_byte) > (b.end_byte - b.start_byte); + // Prefer shorter matches. For example, if we have an expression + // consisting of a variable and a property, we want to highlight + // them as individual components. + return (b.end_byte - b.start_byte) > (a.end_byte - a.start_byte); }); uint32_t current_pos = 0; _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
