https://github.com/adrian-prantl updated https://github.com/llvm/llvm-project/pull/114333
>From c9a4d24f222a70c7c108deebb6c25222893d7159 Mon Sep 17 00:00:00 2001 From: Dave Lee <davelee....@gmail.com> Date: Wed, 24 Jan 2024 12:42:45 -0800 Subject: [PATCH 1/2] [lldb] Load embedded type summary section (#7859) (#8040) Add support for type summaries embedded into the binary. These embedded summaries will typically be generated by Swift macros, but can also be generated by any other means. rdar://115184658 --- lldb/include/lldb/lldb-enumerations.h | 1 + lldb/source/Core/Section.cpp | 3 + .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 1 + .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 4 ++ .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp | 1 + lldb/source/Symbol/ObjectFile.cpp | 1 + lldb/source/Target/Target.cpp | 72 +++++++++++++++++++ .../data-formatter/embedded-summary/Makefile | 2 + .../TestEmbeddedTypeSummary.py | 12 ++++ .../data-formatter/embedded-summary/main.c | 22 ++++++ 10 files changed, 119 insertions(+) create mode 100644 lldb/test/API/functionalities/data-formatter/embedded-summary/Makefile create mode 100644 lldb/test/API/functionalities/data-formatter/embedded-summary/TestEmbeddedTypeSummary.py create mode 100644 lldb/test/API/functionalities/data-formatter/embedded-summary/main.c diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 938f6e3abe8f2a..1ca4aa62218c09 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -761,6 +761,7 @@ enum SectionType { eSectionTypeDWARFDebugLocListsDwo, eSectionTypeDWARFDebugTuIndex, eSectionTypeCTF, + eSectionTypeLLDBTypeSummaries, eSectionTypeSwiftModules, }; diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 0763e88d4608f4..ee01b4ce06ca1e 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -147,6 +147,8 @@ const char *Section::GetTypeAsCString() const { return "dwarf-gnu-debugaltlink"; case eSectionTypeCTF: return "ctf"; + case eSectionTypeLLDBTypeSummaries: + return "lldb-type-summaries"; case eSectionTypeOther: return "regular"; case eSectionTypeSwiftModules: @@ -457,6 +459,7 @@ bool Section::ContainsOnlyDebugInfo() const { case eSectionTypeDWARFAppleObjC: case eSectionTypeDWARFGNUDebugAltLink: case eSectionTypeCTF: + case eSectionTypeLLDBTypeSummaries: case eSectionTypeSwiftModules: return true; } diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 10d09662c0a47a..ad4a84ef02bf72 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1678,6 +1678,7 @@ static SectionType GetSectionTypeFromName(llvm::StringRef Name) { .Case(".gnu_debugaltlink", eSectionTypeDWARFGNUDebugAltLink) .Case(".gosymtab", eSectionTypeGoSymtab) .Case(".text", eSectionTypeCode) + .Case(".lldbsummaries", lldb::eSectionTypeLLDBTypeSummaries) .Case(".swift_ast", eSectionTypeSwiftModules) .Default(eSectionTypeOther); } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index b542e237f023d4..d6bec5d84aea19 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1209,6 +1209,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDWARFAppleObjC: case eSectionTypeDWARFGNUDebugAltLink: case eSectionTypeCTF: + case eSectionTypeLLDBTypeSummaries: case eSectionTypeSwiftModules: return AddressClass::eDebug; @@ -1484,6 +1485,7 @@ static lldb::SectionType GetSectionType(uint32_t flags, static ConstString g_sect_name_data("__data"); static ConstString g_sect_name_go_symtab("__gosymtab"); static ConstString g_sect_name_ctf("__ctf"); + static ConstString g_sect_name_lldb_summaries("__lldbsummaries"); static ConstString g_sect_name_swift_ast("__swift_ast"); if (section_name == g_sect_name_dwarf_debug_abbrev) @@ -1564,6 +1566,8 @@ static lldb::SectionType GetSectionType(uint32_t flags, return eSectionTypeGoSymtab; if (section_name == g_sect_name_ctf) return eSectionTypeCTF; + if (section_name == g_sect_name_lldb_summaries) + return lldb::eSectionTypeLLDBTypeSummaries; if (section_name == g_sect_name_swift_ast) return eSectionTypeSwiftModules; if (section_name == g_sect_name_objc_data || diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 8d9c919bc9b101..bb712da7f6d67d 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -1010,6 +1010,7 @@ SectionType ObjectFilePECOFF::GetSectionType(llvm::StringRef sect_name, // .eh_frame can be truncated to 8 chars. .Cases(".eh_frame", ".eh_fram", eSectionTypeEHFrame) .Case(".gosymtab", eSectionTypeGoSymtab) + .Case(".lldbsummaries", lldb::eSectionTypeLLDBTypeSummaries) .Case("swiftast", eSectionTypeSwiftModules) .Default(eSectionTypeInvalid); if (section_type != eSectionTypeInvalid) diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 35317d209de1f9..3100e6b813b631 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -366,6 +366,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) { case eSectionTypeDWARFAppleObjC: case eSectionTypeDWARFGNUDebugAltLink: case eSectionTypeCTF: + case eSectionTypeLLDBTypeSummaries: case eSectionTypeSwiftModules: return AddressClass::eDebug; case eSectionTypeEHFrame: diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 04395e37f0425d..37670b108d9631 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -26,6 +26,7 @@ #include "lldb/Core/StructuredDataImpl.h" #include "lldb/Core/ValueObject.h" #include "lldb/Core/ValueObjectConstResult.h" +#include "lldb/DataFormatters/DataVisualization.h" #include "lldb/Expression/DiagnosticManager.h" #include "lldb/Expression/ExpressionVariable.h" #include "lldb/Expression/REPL.h" @@ -1537,6 +1538,76 @@ static void LoadScriptingResourceForModule(const ModuleSP &module_sp, feedback_stream.GetData()); } +// Load type summaries embedded in the binary. These are type summaries provided +// by the authors of the code. +static void LoadTypeSummariesForModule(ModuleSP module_sp) { + auto *sections = module_sp->GetSectionList(); + if (!sections) + return; + + auto summaries_sp = + sections->FindSectionByType(eSectionTypeLLDBTypeSummaries, true); + if (!summaries_sp) + return; + + Log *log = GetLog(LLDBLog::DataFormatters); + const char *module_name = module_sp->GetObjectName().GetCString(); + + TypeCategoryImplSP category; + DataVisualization::Categories::GetCategory(ConstString("default"), category); + + // The type summary record is serialized as follows. + // + // Each record contains, in order: + // * Version number of the record format + // * The remaining size of the record + // * The size of the type identifier + // * The type identifier, either a type name, or a regex + // * The size of the summary string + // * The summary string + // + // Integers are encoded using ULEB. + // + // Strings are encoded with first a length (ULEB), then the string contents, + // and lastly a null terminator. The length includes the null. + + DataExtractor extractor; + auto section_size = summaries_sp->GetSectionData(extractor); + lldb::offset_t offset = 0; + while (offset < section_size) { + uint64_t version = extractor.GetULEB128(&offset); + uint64_t record_size = extractor.GetULEB128(&offset); + if (version == 1) { + uint64_t type_size = extractor.GetULEB128(&offset); + llvm::StringRef type_name = extractor.GetCStr(&offset, type_size); + uint64_t summary_size = extractor.GetULEB128(&offset); + llvm::StringRef summary_string = extractor.GetCStr(&offset, summary_size); + if (!type_name.empty() && !summary_string.empty()) { + TypeSummaryImpl::Flags flags; + auto summary_sp = + std::make_shared<StringSummaryFormat>(flags, summary_string.data()); + FormatterMatchType match_type = eFormatterMatchExact; + if (summary_string.front() == '^' && summary_string.back() == '$') + match_type = eFormatterMatchRegex; + category->AddTypeSummary(type_name, match_type, summary_sp); + LLDB_LOGF(log, "Loaded embedded type summary for '%s' from %s.", + type_name.data(), module_name); + } else { + if (type_name.empty()) + LLDB_LOGF(log, "Missing string(s) in embedded type summary in %s.", + module_name); + } + } else { + // Skip unsupported record. + offset += record_size; + LLDB_LOGF( + log, + "Skipping unsupported embedded type summary of version %llu in %s.", + version, module_name); + } + } +} + void Target::ClearModules(bool delete_locations) { ModulesDidUnload(m_images, delete_locations); m_section_load_history.Clear(); @@ -1775,6 +1846,7 @@ void Target::ModulesDidLoad(ModuleList &module_list) { for (size_t idx = 0; idx < num_images; ++idx) { ModuleSP module_sp(module_list.GetModuleAtIndex(idx)); LoadScriptingResourceForModule(module_sp, this); + LoadTypeSummariesForModule(module_sp); } m_breakpoint_list.UpdateBreakpoints(module_list, true, false); m_internal_breakpoint_list.UpdateBreakpoints(module_list, true, false); diff --git a/lldb/test/API/functionalities/data-formatter/embedded-summary/Makefile b/lldb/test/API/functionalities/data-formatter/embedded-summary/Makefile new file mode 100644 index 00000000000000..c9319d6e6888a4 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/embedded-summary/Makefile @@ -0,0 +1,2 @@ +C_SOURCES := main.c +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/embedded-summary/TestEmbeddedTypeSummary.py b/lldb/test/API/functionalities/data-formatter/embedded-summary/TestEmbeddedTypeSummary.py new file mode 100644 index 00000000000000..b8ce7d9f76eb9e --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/embedded-summary/TestEmbeddedTypeSummary.py @@ -0,0 +1,12 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + @skipUnlessDarwin + def test(self): + self.build() + lldbutil.run_to_source_breakpoint(self, "break here", lldb.SBFileSpec("main.c")) + self.expect("v player", substrs=['"Dirk" (41)']) diff --git a/lldb/test/API/functionalities/data-formatter/embedded-summary/main.c b/lldb/test/API/functionalities/data-formatter/embedded-summary/main.c new file mode 100644 index 00000000000000..9ddd64246f726c --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/embedded-summary/main.c @@ -0,0 +1,22 @@ +#include <stdio.h> + +struct Player { + char *name; + int number; +}; + +__attribute__((used, section("__DATA_CONST,__lldbsummaries"))) unsigned char + _Player_type_summary[] = "\x01" // version + "\x25" // record size + "\x07" // type name size + "Player\0" // type name + "\x1c" // summary string size + "${var.name} (${var.number})"; // summary string + +int main() { + struct Player player; + player.name = "Dirk"; + player.number = 41; + puts("break here"); + return 0; +} >From cf8bb17feeeb9ff5b15b22177a72c88306626707 Mon Sep 17 00:00:00 2001 From: Adrian Prantl <apra...@apple.com> Date: Mon, 28 Oct 2024 17:18:34 -0700 Subject: [PATCH 2/2] [lldb] Implement a formatter bytecode interpreter in C++ Compared to the python version, this also does type checking and error handling, so it's slightly longer, however, it's still comfortably under 500 lines. --- .../include/lldb/DataFormatters/TypeSummary.h | 22 +- lldb/include/lldb/lldb-enumerations.h | 1 + lldb/source/Core/Section.cpp | 1 + lldb/source/DataFormatters/CMakeLists.txt | 1 + .../DataFormatters/FormatterBytecode.cpp | 576 ++++++++++++++++++ .../DataFormatters/FormatterBytecode.def | 101 +++ .../source/DataFormatters/FormatterBytecode.h | 64 ++ lldb/source/DataFormatters/TypeSummary.cpp | 74 ++- .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 1 + .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 4 + .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp | 1 + lldb/source/Target/Target.cpp | 163 +++-- .../data-formatter/bytecode-summary/Makefile | 2 + .../bytecode-summary/TestBytecodeSummary.py | 14 + .../data-formatter/bytecode-summary/main.cpp | 36 ++ lldb/unittests/DataFormatter/CMakeLists.txt | 1 + .../DataFormatter/FormatterBytecodeTest.cpp | 36 ++ 17 files changed, 1052 insertions(+), 46 deletions(-) create mode 100644 lldb/source/DataFormatters/FormatterBytecode.cpp create mode 100644 lldb/source/DataFormatters/FormatterBytecode.def create mode 100644 lldb/source/DataFormatters/FormatterBytecode.h create mode 100644 lldb/test/API/functionalities/data-formatter/bytecode-summary/Makefile create mode 100644 lldb/test/API/functionalities/data-formatter/bytecode-summary/TestBytecodeSummary.py create mode 100644 lldb/test/API/functionalities/data-formatter/bytecode-summary/main.cpp create mode 100644 lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp diff --git a/lldb/include/lldb/DataFormatters/TypeSummary.h b/lldb/include/lldb/DataFormatters/TypeSummary.h index 382824aa2813da..0d8e46fa0b1598 100644 --- a/lldb/include/lldb/DataFormatters/TypeSummary.h +++ b/lldb/include/lldb/DataFormatters/TypeSummary.h @@ -22,6 +22,10 @@ #include "lldb/Utility/Status.h" #include "lldb/Utility/StructuredData.h" +namespace llvm { +class MemoryBuffer; +} + namespace lldb_private { class TypeSummaryOptions { public: @@ -44,7 +48,7 @@ class TypeSummaryOptions { class TypeSummaryImpl { public: - enum class Kind { eSummaryString, eScript, eCallback, eInternal }; + enum class Kind { eSummaryString, eScript, eBytecode, eCallback, eInternal }; virtual ~TypeSummaryImpl() = default; @@ -409,6 +413,22 @@ struct ScriptSummaryFormat : public TypeSummaryImpl { ScriptSummaryFormat(const ScriptSummaryFormat &) = delete; const ScriptSummaryFormat &operator=(const ScriptSummaryFormat &) = delete; }; + +/// A summary formatter that is defined in LLDB formmater bytecode. +class BytecodeSummaryFormat : public TypeSummaryImpl { + std::unique_ptr<llvm::MemoryBuffer> m_bytecode; +public: + BytecodeSummaryFormat(const TypeSummaryImpl::Flags &flags, + std::unique_ptr<llvm::MemoryBuffer> bytecode); + bool FormatObject(ValueObject *valobj, std::string &dest, + const TypeSummaryOptions &options) override; + std::string GetDescription() override; + std::string GetName() override; + static bool classof(const TypeSummaryImpl *S) { + return S->GetKind() == Kind::eBytecode; + } +}; + } // namespace lldb_private #endif // LLDB_DATAFORMATTERS_TYPESUMMARY_H diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 1ca4aa62218c09..b2f0943d5a9260 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -762,6 +762,7 @@ enum SectionType { eSectionTypeDWARFDebugTuIndex, eSectionTypeCTF, eSectionTypeLLDBTypeSummaries, + eSectionTypeLLDBFormatters, eSectionTypeSwiftModules, }; diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index ee01b4ce06ca1e..3b5ca2c6785ef0 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -460,6 +460,7 @@ bool Section::ContainsOnlyDebugInfo() const { case eSectionTypeDWARFGNUDebugAltLink: case eSectionTypeCTF: case eSectionTypeLLDBTypeSummaries: + case eSectionTypeLLDBFormatters: case eSectionTypeSwiftModules: return true; } diff --git a/lldb/source/DataFormatters/CMakeLists.txt b/lldb/source/DataFormatters/CMakeLists.txt index 7f48a2785c73f5..17da138227d4f1 100644 --- a/lldb/source/DataFormatters/CMakeLists.txt +++ b/lldb/source/DataFormatters/CMakeLists.txt @@ -5,6 +5,7 @@ add_lldb_library(lldbDataFormatters NO_PLUGIN_DEPENDENCIES FormatCache.cpp FormatClasses.cpp FormatManager.cpp + FormatterBytecode.cpp FormattersHelpers.cpp LanguageCategory.cpp StringPrinter.cpp diff --git a/lldb/source/DataFormatters/FormatterBytecode.cpp b/lldb/source/DataFormatters/FormatterBytecode.cpp new file mode 100644 index 00000000000000..f1e7ba71a45200 --- /dev/null +++ b/lldb/source/DataFormatters/FormatterBytecode.cpp @@ -0,0 +1,576 @@ +//===-- FormatterBytecode.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatterBytecode.h" +#include "lldb/Core/ValueObject.h" +#include "lldb/Utility/LLDBLog.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadicDetails.h" +#include "llvm/Support/FormatProviders.h" +#include "llvm/ADT/StringExtras.h" + +using namespace lldb; +namespace lldb_private { + +std::string toString(FormatterBytecode::OpCodes op) { + switch (op) { +#define DEFINE_OPCODE(OP, MNEMONIC, NAME) \ + case OP: { \ + const char *s = MNEMONIC; \ + return s ? s : #NAME; \ + } +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE + } + return llvm::utostr(op); +} + +std::string toString(FormatterBytecode::Selectors sel) { + switch (sel) { +#define DEFINE_SELECTOR(ID, NAME) \ + case ID: \ + return "@" #NAME; +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE + } + return "@"+llvm::utostr(sel); +} + +std::string toString(FormatterBytecode::Signatures sig) { + switch (sig) { +#define DEFINE_SIGNATURE(ID, NAME) \ + case ID: \ + return "@" #NAME; +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE + } + return llvm::utostr(sig); +} + +std::string toString(const FormatterBytecode::DataStack &data) { + std::string s; + llvm::raw_string_ostream os(s); + os << "[ "; + for (auto &d : data) { + if (auto s = std::get_if<std::string>(&d)) + os << '"' << *s << '"'; + else if (auto u = std::get_if<uint64_t>(&d)) + os << *u << 'u'; + else if (auto i = std::get_if<int64_t>(&d)) + os << *i; + else if (auto valobj = std::get_if<ValueObjectSP>(&d)) { + if (!valobj->get()) + os << "null"; + else + os << "object(" << valobj->get()->GetValueAsCString() << ')'; + } else if (auto type = std::get_if<CompilerType>(&d)) { + os << '(' << type->GetTypeName(true) << ')'; + } else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&d)) { + os << toString(*sel); + } + os << ' '; + } + os << ']'; + return s; +} + +namespace FormatterBytecode { + +/// Implement the @format function. +static llvm::Error FormatImpl(DataStack &data) { + auto fmt = data.Pop<std::string>(); + auto replacements = + llvm::formatv_object_base::parseFormatString(fmt, 0, false); + std::string s; + llvm::raw_string_ostream os(s); + unsigned num_args = 0; + for (const auto &r : replacements) + if (r.Type == llvm::ReplacementType::Format) + num_args = std::max(num_args, r.Index); + + if (data.size() < num_args) + return llvm::createStringError("not enough arguments"); + + for (const auto &r : replacements) { + if (r.Type == llvm::ReplacementType::Literal) { + os << r.Spec; + continue; + } + using namespace llvm::support::detail; + auto arg = data[data.size() - num_args + r.Index]; + auto format = [&](format_adapter &&adapter) { + llvm::FmtAlign Align(adapter, r.Where, r.Width, r.Pad); + Align.format(os, r.Options); + }; + + if (auto s = std::get_if<std::string>(&arg)) + format(build_format_adapter(s)); + else if (auto u = std::get_if<uint64_t>(&arg)) + format(build_format_adapter(u)); + else if (auto i = std::get_if<int64_t>(&arg)) + format(build_format_adapter(i)); + else if (auto valobj = std::get_if<ValueObjectSP>(&arg)) { + if (!valobj->get()) + format(build_format_adapter("null object")); + else + format(build_format_adapter(valobj->get()->GetValueAsCString())); + } else if (auto type = std::get_if<CompilerType>(&arg)) + format(build_format_adapter(type->GetDisplayTypeName())); + else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&arg)) + format(build_format_adapter(toString(*sel))); + } + data.Push(s); + return llvm::Error::success(); +} + +static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data, + DataType type) { + if (data.size() < 1) + return llvm::createStringError("not enough elements on data stack"); + + auto &elem = data.back(); + switch (type) { + case Any: + break; + case String: + if (!std::holds_alternative<std::string>(elem)) + return llvm::createStringError("expected String"); + break; + case UInt: + if (!std::holds_alternative<uint64_t>(elem)) + return llvm::createStringError("expected UInt"); + break; + case Int: + if (!std::holds_alternative<int64_t>(elem)) + return llvm::createStringError("expected Int"); + break; + case Object: + if (!std::holds_alternative<ValueObjectSP>(elem)) + return llvm::createStringError("expected Object"); + break; + case Type: + if (!std::holds_alternative<CompilerType>(elem)) + return llvm::createStringError("expected Type"); + break; + case Selector: + if (!std::holds_alternative<Selectors>(elem)) + return llvm::createStringError("expected Selector"); + break; + } + return llvm::Error::success(); +} + +static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data, + DataType type1, DataType type2) { + if (auto error = TypeCheck(data, type2)) + return error; + return TypeCheck(data.drop_back(), type1); +} + +static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data, + DataType type1, DataType type2, DataType type3) { + if (auto error = TypeCheck(data, type3)) + return error; + return TypeCheck(data.drop_back(1), type2, type1); +} + +llvm::Error Interpret(std::vector<ControlStackElement> &control, + DataStack &data, Selectors sel) { + if (control.empty()) + return llvm::Error::success(); + // Since the only data types are single endian and ULEBs, the + // endianness should not matter. + llvm::DataExtractor cur_block(control.back(), true, 64); + llvm::DataExtractor::Cursor pc(0); + + while (!control.empty()) { + /// Activate the top most block from the control stack. + auto activate_block = [&]() { + // Save the return address. + if (control.size() > 1) + control[control.size() - 2] = cur_block.getData().drop_front(pc.tell()); + cur_block = llvm::DataExtractor(control.back(), true, 64); + if (pc) + pc = llvm::DataExtractor::Cursor(0); + }; + + /// Fetch the next byte in the instruction stream. + auto next_byte = [&]() -> uint8_t { + // At the end of the current block? + while (pc.tell() >= cur_block.size() && !control.empty()) { + if (control.size() == 1) { + control.pop_back(); + return 0; + } + control.pop_back(); + activate_block(); + } + + // Fetch the next instruction. + return cur_block.getU8(pc); + }; + + // Fetch the next opcode. + OpCodes opcode = (OpCodes)next_byte(); + if (control.empty() || !pc) + return pc.takeError(); + + LLDB_LOGV(GetLog(LLDBLog::DataFormatters), + "[eval {0}] opcode={1}, control={2}, data={3}", toString(sel), + toString(opcode), control.size(), toString(data)); + + + // Various shorthands to improve the readability of error handling. +#define TYPE_CHECK(...) \ + if (auto error = TypeCheck(data, __VA_ARGS__)) \ + return error; + + auto error = [&](llvm::Twine msg) { + return llvm::createStringError(msg + "(opcode=" + toString(opcode) + ")"); + }; + + switch (opcode) { + // Data stack manipulation. + case op_dup: + TYPE_CHECK(Any); + data.Push(data.back()); + break; + case op_drop: + TYPE_CHECK(Any); + data.pop_back(); + break; + case op_pick: { + TYPE_CHECK(UInt); + uint64_t idx = data.Pop<uint64_t>(); + if (idx >= data.size()) + return error("index out of bounds"); + data.Push(data[idx]); + break; + } + case op_over: + TYPE_CHECK(Any, Any); + data.Push(data[data.size() - 2]); + break; + case op_swap: { + TYPE_CHECK(Any, Any); + auto x = data.PopAny(); + auto y = data.PopAny(); + data.Push(x); + data.Push(y); + break; + } + case op_rot: { + TYPE_CHECK(Any, Any, Any); + auto z = data.PopAny(); + auto y = data.PopAny(); + auto x = data.PopAny(); + data.Push(z); + data.Push(x); + data.Push(y); + break; + } + // Control stack manipulation. + case op_begin: { + uint64_t length = cur_block.getULEB128(pc); + if (!pc) + return pc.takeError(); + llvm::StringRef block = cur_block.getBytes(pc, length); + if (!pc) + return pc.takeError(); + control.push_back(block); + break; + } + case op_if: + TYPE_CHECK(UInt); + if (data.Pop<uint64_t>() != 0) { + if (!cur_block.size()) + return error("empty control stack"); + activate_block(); + } + break; + case op_ifelse: + TYPE_CHECK(UInt); + if (cur_block.size() < 2) + return error("empty control stack"); + if (data.Pop<uint64_t>() == 0) + control[control.size()-2] = control.back(); + control.pop_back(); + activate_block(); + break; + // Literals. + case op_lit_uint: + data.Push(cur_block.getULEB128(pc)); + break; + case op_lit_int: + data.Push(cur_block.getSLEB128(pc)); + break; + case op_lit_selector: + data.Push(Selectors(cur_block.getU8(pc))); + break; + case op_lit_string: { + uint64_t length = cur_block.getULEB128(pc); + llvm::StringRef bytes = cur_block.getBytes(pc, length); + data.Push(bytes.str()); + break; + } + case op_as_uint: { + TYPE_CHECK(Int); + uint64_t casted; + int64_t val = data.Pop<int64_t>(); + memcpy(&casted, &val, sizeof(val)); + data.Push(casted); + break; + } + case op_as_int: { + TYPE_CHECK(UInt); + int64_t casted; + uint64_t val = data.Pop<uint64_t>(); + memcpy(&casted, &val, sizeof(val)); + data.Push(casted); + break; + } + case op_is_null: { + TYPE_CHECK(Object); + data.Push(data.Pop<ValueObjectSP>() ? 0ULL : 1ULL); + break; + } + // Arithmetic, logic, etc. +#define BINOP_IMPL(OP, CHECK_ZERO) \ + { \ + TYPE_CHECK(Any, Any); \ + auto y = data.PopAny(); \ + if (std::holds_alternative<uint64_t>(y)) { \ + if (CHECK_ZERO && !std::get<uint64_t>(y)) \ + return error(#OP " by zero"); \ + TYPE_CHECK(UInt); \ + data.Push((uint64_t)(data.Pop<uint64_t>() OP std::get<uint64_t>(y))); \ + } else if (std::holds_alternative<int64_t>(y)) { \ + if (CHECK_ZERO && !std::get<int64_t>(y)) \ + return error(#OP " by zero"); \ + TYPE_CHECK(Int); \ + data.Push((int64_t)(data.Pop<int64_t>() OP std::get<int64_t>(y))); \ + } else \ + return error("unsupported data types"); \ + } +#define BINOP(OP) BINOP_IMPL(OP, false) +#define BINOP_CHECKZERO(OP) BINOP_IMPL(OP, true) + case op_plus: + BINOP(+); + break; + case op_minus: + BINOP(-); + break; + case op_mul: + BINOP(*); + break; + case op_div: + BINOP_CHECKZERO(/); + break; + case op_mod: + BINOP_CHECKZERO(%); + break; + case op_shl: +#define SHIFTOP(OP) \ + { \ + TYPE_CHECK(Any, Any); \ + if (std::holds_alternative<uint64_t>(data.back())) { \ + uint64_t y = data.Pop<uint64_t>(); \ + TYPE_CHECK(UInt); \ + uint64_t x = data.Pop<uint64_t>(); \ + if (y > 64) \ + return error("shift out of bounds"); \ + data.Push(x OP y); \ + } else if (std::holds_alternative<int64_t>(data.back())) { \ + uint64_t y = data.Pop<int64_t>(); \ + TYPE_CHECK(Int); \ + uint64_t x = data.Pop<int64_t>(); \ + if (y > 64) \ + return error("shift out of bounds"); \ + if (y < 0) \ + return error("shift out of bounds"); \ + data.Push(x OP y); \ + } else \ + return error("unsupported data types"); \ + } + SHIFTOP(<<); + break; + case op_shr: + SHIFTOP(<<); + break; + case op_and: + BINOP(&); + break; + case op_or: + BINOP(|); + break; + case op_xor: + BINOP(^); + break; + case op_not: + TYPE_CHECK(UInt); + data.Push(~data.Pop<uint64_t>()); + break; + case op_eq: + BINOP(==); + break; + case op_neq: + BINOP(!=); + break; + case op_lt: + BINOP(<); + break; + case op_gt: + BINOP(>); + break; + case op_le: + BINOP(<=); + break; + case op_ge: + BINOP(>=); + break; + case op_call: { + TYPE_CHECK(Selector); + Selectors sel = data.Pop<Selectors>(); + + // Shorthand to improve readability. +#define POP_VALOBJ(VALOBJ) \ + auto VALOBJ = data.Pop<ValueObjectSP>(); \ + if (!VALOBJ) \ + return error("null object"); + + auto sel_error = [&](const char *msg) { + return llvm::createStringError("{0} (opcode={1}, selector={2})", msg, + toString(opcode).c_str(), + toString(sel).c_str()); + }; + + switch (sel) { + case sel_summary: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + const char *summary = valobj->GetSummaryAsCString(); + data.Push(summary ? std::string(valobj->GetSummaryAsCString()) + : std::string()); + break; + } + case sel_get_num_children: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + auto result = valobj->GetNumChildren(); + if (!result) + return result.takeError(); + data.Push((uint64_t)*result); + break; + } + case sel_get_child_at_index: { + TYPE_CHECK(Object, UInt); + auto index = data.Pop<uint64_t>(); + POP_VALOBJ(valobj); + data.Push(valobj->GetChildAtIndex(index)); + break; + } + case sel_get_child_with_name: { + TYPE_CHECK(Object, String); + auto name = data.Pop<std::string>(); + POP_VALOBJ(valobj); + data.Push(valobj->GetChildMemberWithName(name)); + break; + } + case sel_get_child_index: { + TYPE_CHECK(Object, String); + auto name = data.Pop<std::string>(); + POP_VALOBJ(valobj); + data.Push(valobj->GetIndexOfChildWithName(name)); + break; + } + case sel_get_type: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + // FIXME: do we need to control dynamic type resolution? + data.Push(valobj->GetTypeImpl().GetCompilerType(false)); + break; + } + case sel_get_template_argument_type: { + TYPE_CHECK(Type, UInt); + auto index = data.Pop<uint64_t>(); + auto type = data.Pop<CompilerType>(); + // FIXME: There is more code in SBType::GetTemplateArgumentType(). + data.Push(type.GetTypeTemplateArgument(index, true)); + break; + } + case sel_get_value: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + data.Push(std::string(valobj->GetValueAsCString())); + break; + } + case sel_get_value_as_unsigned: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + bool success; + uint64_t val = valobj->GetValueAsUnsigned(0, &success); + data.Push(val); + if (!success) + return sel_error("failed to get value"); + break; + } + case sel_get_value_as_signed: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + bool success; + int64_t val = valobj->GetValueAsSigned(0, &success); + data.Push(val); + if (!success) + return sel_error("failed to get value"); + break; + } + case sel_get_value_as_address: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + bool success; + uint64_t addr = valobj->GetValueAsUnsigned(0, &success); + if (!success) + return sel_error("failed to get value"); + if (auto process_sp = valobj->GetProcessSP()) + addr = process_sp->FixDataAddress(addr); + data.Push(addr); + break; + } + case sel_cast: { + TYPE_CHECK(Object, Type); + auto type = data.Pop<CompilerType>(); + POP_VALOBJ(valobj); + data.Push(valobj->Cast(type)); + break; + } + case sel_strlen: { + TYPE_CHECK(String); + data.Push(data.Pop<std::string>().size()); + break; + } + case sel_fmt: { + TYPE_CHECK(String); + if (auto error = FormatImpl(data)) + return error; + break; + } + default: + return sel_error("selector not implemented"); + } + break; + } + default: + return error("opcode not implemented"); + } + } + return pc.takeError(); +} +} // namespace FormatterBytecode + +} // namespace lldb_private diff --git a/lldb/source/DataFormatters/FormatterBytecode.def b/lldb/source/DataFormatters/FormatterBytecode.def new file mode 100644 index 00000000000000..c6645631fa0065 --- /dev/null +++ b/lldb/source/DataFormatters/FormatterBytecode.def @@ -0,0 +1,101 @@ +//===-- FormatterBytecode.def -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef DEFINE_OPCODE +#define DEFINE_OPCODE(OP, MNEMONIC, NAME) +#endif +#ifndef DEFINE_SELECTOR +#define DEFINE_SELECTOR(ID, NAME) +#endif +#ifndef DEFINE_SIGNATURE +#define DEFINE_SIGNATURE(ID, NAME) +#endif + +// Opcodes. +DEFINE_OPCODE(0x01, "dup", dup) +DEFINE_OPCODE(0x02, "drop", drop) +DEFINE_OPCODE(0x03, "pick", pick) +DEFINE_OPCODE(0x04, "over", over) +DEFINE_OPCODE(0x05, "swap", swap) +DEFINE_OPCODE(0x06, "rot", rot) + +DEFINE_OPCODE(0x10, "{", begin) +DEFINE_OPCODE(0x11, "if", if) +DEFINE_OPCODE(0x12, "ifelse", ifelse) + +DEFINE_OPCODE(0x20, nullptr, lit_uint) +DEFINE_OPCODE(0x21, nullptr, lit_int) +DEFINE_OPCODE(0x22, nullptr, lit_string) +DEFINE_OPCODE(0x23, nullptr, lit_selector) + +DEFINE_OPCODE(0x2a, "as_int", as_int) +DEFINE_OPCODE(0x2b, "as_uint", as_uint) +DEFINE_OPCODE(0x2c, "is_null", is_null) + +DEFINE_OPCODE(0x30, "+", plus) +DEFINE_OPCODE(0x31, "-", minus) +DEFINE_OPCODE(0x32, "*", mul) +DEFINE_OPCODE(0x33, "/", div) +DEFINE_OPCODE(0x34, "%", mod) +DEFINE_OPCODE(0x35, "<<", shl) +DEFINE_OPCODE(0x36, ">>", shr) + +DEFINE_OPCODE(0x40, "&", and) +DEFINE_OPCODE(0x41, "|", or) +DEFINE_OPCODE(0x42, "^", xor) +DEFINE_OPCODE(0x43, "~", not) + +DEFINE_OPCODE(0x50, "=", eq) +DEFINE_OPCODE(0x51, "!=", neq) +DEFINE_OPCODE(0x52, "<", lt) +DEFINE_OPCODE(0x53, ">", gt) +DEFINE_OPCODE(0x54, "=<", le) +DEFINE_OPCODE(0x55, ">=", ge) + +DEFINE_OPCODE(0x60, "call", call) + +// Selectors. +DEFINE_SELECTOR(0x00, summary) +DEFINE_SELECTOR(0x01, type_summary) + +DEFINE_SELECTOR(0x10, get_num_children) +DEFINE_SELECTOR(0x11, get_child_at_index) +DEFINE_SELECTOR(0x12, get_child_with_name) +DEFINE_SELECTOR(0x13, get_child_index) +DEFINE_SELECTOR(0x15, get_type) +DEFINE_SELECTOR(0x16, get_template_argument_type) +DEFINE_SELECTOR(0x17, cast) + +DEFINE_SELECTOR(0x20, get_value) +DEFINE_SELECTOR(0x21, get_value_as_unsigned) +DEFINE_SELECTOR(0x22, get_value_as_signed) +DEFINE_SELECTOR(0x23, get_value_as_address) + +DEFINE_SELECTOR(0x40, read_memory_byte) +DEFINE_SELECTOR(0x41, read_memory_uint32) +DEFINE_SELECTOR(0x42, read_memory_int32) +DEFINE_SELECTOR(0x43, read_memory_unsigned) +DEFINE_SELECTOR(0x44, read_memory_signed) +DEFINE_SELECTOR(0x45, read_memory_address) +DEFINE_SELECTOR(0x46, read_memory) + +DEFINE_SELECTOR(0x50, fmt) +DEFINE_SELECTOR(0x51, sprintf) +DEFINE_SELECTOR(0x52, strlen) + +// Formatter signatures. +DEFINE_SIGNATURE(0, summary) +DEFINE_SIGNATURE(1, init) +DEFINE_SIGNATURE(2, get_num_children) +DEFINE_SIGNATURE(3, get_child_index) +DEFINE_SIGNATURE(4, get_child_at_index) +DEFINE_SIGNATURE(5, get_value) + +#undef DEFINE_OPCODE +#undef DEFINE_SELECTOR +#undef DEFINE_SIGNATURE diff --git a/lldb/source/DataFormatters/FormatterBytecode.h b/lldb/source/DataFormatters/FormatterBytecode.h new file mode 100644 index 00000000000000..21454d9c7e231f --- /dev/null +++ b/lldb/source/DataFormatters/FormatterBytecode.h @@ -0,0 +1,64 @@ +//===-- FormatterBytecode.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/DataFormatters/TypeSummary.h" +#include "lldb/Symbol/CompilerType.h" + +namespace lldb_private { + +namespace FormatterBytecode { + +enum DataType : uint8_t { Any, String, Int, UInt, Object, Type, Selector }; + +enum OpCodes : uint8_t { +#define DEFINE_OPCODE(OP, MNEMONIC, NAME) op_##NAME = OP, +#include "FormatterBytecode.def" +#undef DEFINE_OPCODE +}; + +enum Selectors : uint8_t { +#define DEFINE_SELECTOR(ID, NAME) sel_##NAME = ID, +#include "FormatterBytecode.def" +#undef DEFINE_SELECTOR +}; + +enum Signatures : uint8_t { +#define DEFINE_SIGNATURE(ID, NAME) sig_##NAME = ID, +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE +}; + +using ControlStackElement = llvm::StringRef; +using DataStackElement = + std::variant<std::string, uint64_t, int64_t, lldb::ValueObjectSP, + CompilerType, Selectors>; +struct DataStack : public std::vector<DataStackElement> { + DataStack() = default; + DataStack(lldb::ValueObjectSP initial_value) + : std::vector<DataStackElement>({initial_value}) {} + void Push(DataStackElement el) { push_back(el); } + template <typename T> T Pop() { + T el = std::get<T>(back()); + pop_back(); + return el; + } + DataStackElement PopAny() { + DataStackElement el = back(); + pop_back(); + return el; + } +}; +llvm::Error Interpret(std::vector<ControlStackElement> &control, + DataStack &data, Selectors sel); +} // namespace FormatterBytecode + +std::string toString(FormatterBytecode::OpCodes op); +std::string toString(FormatterBytecode::Selectors sel); +std::string toString(FormatterBytecode::Signatures sig); + +} // namespace lldb_private diff --git a/lldb/source/DataFormatters/TypeSummary.cpp b/lldb/source/DataFormatters/TypeSummary.cpp index 339068e8cc6aa6..482834a755b54d 100644 --- a/lldb/source/DataFormatters/TypeSummary.cpp +++ b/lldb/source/DataFormatters/TypeSummary.cpp @@ -8,9 +8,7 @@ #include "lldb/DataFormatters/TypeSummary.h" - - - +#include "FormatterBytecode.h" #include "lldb/lldb-enumerations.h" #include "lldb/lldb-public.h" @@ -58,6 +56,8 @@ std::string TypeSummaryImpl::GetSummaryKindName() { return "python"; case Kind::eInternal: return "c++"; + case Kind::eBytecode: + return "bytecode"; } } @@ -230,3 +230,71 @@ std::string ScriptSummaryFormat::GetDescription() { } std::string ScriptSummaryFormat::GetName() { return m_script_formatter_name; } + +BytecodeSummaryFormat::BytecodeSummaryFormat( + const TypeSummaryImpl::Flags &flags, + std::unique_ptr<llvm::MemoryBuffer> bytecode) + : TypeSummaryImpl(Kind::eBytecode, flags), m_bytecode(std::move(bytecode)) { +} + +bool BytecodeSummaryFormat::FormatObject(ValueObject *valobj, std::string &retval, + const TypeSummaryOptions &options) { + if (!valobj) + return false; + + TargetSP target_sp(valobj->GetTargetSP()); + + if (!target_sp) { + retval.assign("error: no target"); + return false; + } + + std::vector<FormatterBytecode::ControlStackElement> control( + {m_bytecode->getBuffer()}); + FormatterBytecode::DataStack data({valobj->GetSP()}); + llvm::Error error = FormatterBytecode::Interpret( + control, data, FormatterBytecode::sel_summary); + if (error) { + retval = llvm::toString(std::move(error)); + return false; + } + if (!data.size()) { + retval = "empty stack"; + return false; + } + auto &top = data.back(); + retval = ""; + llvm::raw_string_ostream os(retval); + if (auto s = std::get_if<std::string>(&top)) + os << *s; + else if (auto u = std::get_if<uint64_t>(&top)) + os << *u; + else if (auto i = std::get_if<int64_t>(&top)) + os << *i; + else if (auto valobj = std::get_if<ValueObjectSP>(&top)) { + if (!valobj->get()) + os << "empty object"; + else + os << valobj->get()->GetValueAsCString(); + } else if (auto type = std::get_if<CompilerType>(&top)) { + os<<type->TypeDescription(); + } else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&top)) { + os<< toString(*sel); + } + return true; +} + +std::string BytecodeSummaryFormat::GetDescription() { + StreamString sstr; + sstr.Printf("%s%s%s%s%s%s%s\n ", Cascades() ? "" : " (not cascading)", + !DoesPrintChildren(nullptr) ? "" : " (show children)", + !DoesPrintValue(nullptr) ? " (hide value)" : "", + IsOneLiner() ? " (one-line printout)" : "", + SkipsPointers() ? " (skip pointers)" : "", + SkipsReferences() ? " (skip references)" : "", + HideNames(nullptr) ? " (hide member names)" : ""); + // FIXME: sstr.PutCString(disassembly); + return std::string(sstr.GetString()); +} + +std::string BytecodeSummaryFormat::GetName() { return "LLDB bytecode formatter"; } diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index ad4a84ef02bf72..0a03bde7967444 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1679,6 +1679,7 @@ static SectionType GetSectionTypeFromName(llvm::StringRef Name) { .Case(".gosymtab", eSectionTypeGoSymtab) .Case(".text", eSectionTypeCode) .Case(".lldbsummaries", lldb::eSectionTypeLLDBTypeSummaries) + .Case(".lldbformatters", lldb::eSectionTypeLLDBFormatters) .Case(".swift_ast", eSectionTypeSwiftModules) .Default(eSectionTypeOther); } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index d6bec5d84aea19..ab26b7fa6648dc 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1210,6 +1210,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDWARFGNUDebugAltLink: case eSectionTypeCTF: case eSectionTypeLLDBTypeSummaries: + case eSectionTypeLLDBFormatters: case eSectionTypeSwiftModules: return AddressClass::eDebug; @@ -1486,6 +1487,7 @@ static lldb::SectionType GetSectionType(uint32_t flags, static ConstString g_sect_name_go_symtab("__gosymtab"); static ConstString g_sect_name_ctf("__ctf"); static ConstString g_sect_name_lldb_summaries("__lldbsummaries"); + static ConstString g_sect_name_lldb_formatters("__lldbformatters"); static ConstString g_sect_name_swift_ast("__swift_ast"); if (section_name == g_sect_name_dwarf_debug_abbrev) @@ -1568,6 +1570,8 @@ static lldb::SectionType GetSectionType(uint32_t flags, return eSectionTypeCTF; if (section_name == g_sect_name_lldb_summaries) return lldb::eSectionTypeLLDBTypeSummaries; + if (section_name == g_sect_name_lldb_formatters) + return lldb::eSectionTypeLLDBFormatters; if (section_name == g_sect_name_swift_ast) return eSectionTypeSwiftModules; if (section_name == g_sect_name_objc_data || diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index bb712da7f6d67d..bfdb8140e40aff 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -1011,6 +1011,7 @@ SectionType ObjectFilePECOFF::GetSectionType(llvm::StringRef sect_name, .Cases(".eh_frame", ".eh_fram", eSectionTypeEHFrame) .Case(".gosymtab", eSectionTypeGoSymtab) .Case(".lldbsummaries", lldb::eSectionTypeLLDBTypeSummaries) + .Case(".lldbformatters", lldb::eSectionTypeLLDBFormatters) .Case("swiftast", eSectionTypeSwiftModules) .Default(eSectionTypeInvalid); if (section_type != eSectionTypeInvalid) diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 37670b108d9631..679f5cac24809c 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -1538,21 +1538,17 @@ static void LoadScriptingResourceForModule(const ModuleSP &module_sp, feedback_stream.GetData()); } -// Load type summaries embedded in the binary. These are type summaries provided -// by the authors of the code. -static void LoadTypeSummariesForModule(ModuleSP module_sp) { - auto *sections = module_sp->GetSectionList(); +static void ForEachFormatterInModule( + Module &module, SectionType section_type, + std::function<void(llvm::DataExtractor, llvm::StringRef)> fn) { + auto *sections = module.GetSectionList(); if (!sections) return; - auto summaries_sp = - sections->FindSectionByType(eSectionTypeLLDBTypeSummaries, true); - if (!summaries_sp) + auto section_sp = sections->FindSectionByType(section_type, true); + if (!section_sp) return; - Log *log = GetLog(LLDBLog::DataFormatters); - const char *module_name = module_sp->GetObjectName().GetCString(); - TypeCategoryImplSP category; DataVisualization::Categories::GetCategory(ConstString("default"), category); @@ -1563,49 +1559,131 @@ static void LoadTypeSummariesForModule(ModuleSP module_sp) { // * The remaining size of the record // * The size of the type identifier // * The type identifier, either a type name, or a regex - // * The size of the summary string - // * The summary string + // * The size of the entry + // * The entry // // Integers are encoded using ULEB. // // Strings are encoded with first a length (ULEB), then the string contents, // and lastly a null terminator. The length includes the null. - DataExtractor extractor; - auto section_size = summaries_sp->GetSectionData(extractor); - lldb::offset_t offset = 0; - while (offset < section_size) { - uint64_t version = extractor.GetULEB128(&offset); - uint64_t record_size = extractor.GetULEB128(&offset); + DataExtractor lldb_extractor; + auto section_size = section_sp->GetSectionData(lldb_extractor); + llvm::DataExtractor section = lldb_extractor.GetAsLLVM(); + bool le = section.isLittleEndian(); + uint8_t addr_size = section.getAddressSize(); + llvm::DataExtractor::Cursor cursor(0); + while (cursor && cursor.tell() < section_size) { + uint64_t version = section.getULEB128(cursor); + uint64_t record_size = section.getULEB128(cursor); if (version == 1) { - uint64_t type_size = extractor.GetULEB128(&offset); - llvm::StringRef type_name = extractor.GetCStr(&offset, type_size); - uint64_t summary_size = extractor.GetULEB128(&offset); - llvm::StringRef summary_string = extractor.GetCStr(&offset, summary_size); - if (!type_name.empty() && !summary_string.empty()) { - TypeSummaryImpl::Flags flags; - auto summary_sp = - std::make_shared<StringSummaryFormat>(flags, summary_string.data()); - FormatterMatchType match_type = eFormatterMatchExact; - if (summary_string.front() == '^' && summary_string.back() == '$') - match_type = eFormatterMatchRegex; - category->AddTypeSummary(type_name, match_type, summary_sp); - LLDB_LOGF(log, "Loaded embedded type summary for '%s' from %s.", - type_name.data(), module_name); - } else { - if (type_name.empty()) - LLDB_LOGF(log, "Missing string(s) in embedded type summary in %s.", - module_name); - } + llvm::DataExtractor record(section.getData().drop_front(cursor.tell()), + le, addr_size); + llvm::DataExtractor::Cursor cursor(0); + uint64_t type_size = record.getULEB128(cursor); + llvm::StringRef type_name = record.getBytes(cursor, type_size); + llvm::Error error = cursor.takeError(); + if (!error) + fn(llvm::DataExtractor(record.getData().drop_front(cursor.tell()), le, + addr_size), type_name); + else + LLDB_LOG_ERROR(GetLog(LLDBLog::DataFormatters), std::move(error), + "{0}"); } else { // Skip unsupported record. - offset += record_size; - LLDB_LOGF( - log, - "Skipping unsupported embedded type summary of version %llu in %s.", - version, module_name); + LLDB_LOG( + GetLog(LLDBLog::DataFormatters), + "Skipping unsupported embedded type summary of version {0} in {1}.", + version, module.GetFileSpec()); } + section.skip(cursor, record_size); } + if (!cursor) + LLDB_LOG_ERROR(GetLog(LLDBLog::DataFormatters), cursor.takeError(), "{0}"); +} + +/// Load type summaries embedded in the binary. These are type summaries provided +/// by the authors of the code. +static void LoadTypeSummariesForModule(ModuleSP module_sp) { + ForEachFormatterInModule( + *module_sp, eSectionTypeLLDBTypeSummaries, + [&](llvm::DataExtractor extractor, llvm::StringRef type_name) { + TypeCategoryImplSP category; + DataVisualization::Categories::GetCategory(ConstString("default"), + category); + // The type summary record is serialized as follows. + // + // * The size of the summary string + // * The summary string + // + // Integers are encoded using ULEB. + llvm::DataExtractor::Cursor cursor(0); + uint64_t summary_size = extractor.getULEB128(cursor); + llvm::StringRef summary_string = + extractor.getBytes(cursor, summary_size); + if (!cursor) { + LLDB_LOG_ERROR(GetLog(LLDBLog::DataFormatters), cursor.takeError(), + "{0}"); + return; + } + if (type_name.empty() || summary_string.empty()) { + LLDB_LOG(GetLog(LLDBLog::DataFormatters), + "Missing string(s) in embedded type summary in {0}.", + module_sp->GetFileSpec()); + return; + } + TypeSummaryImpl::Flags flags; + auto summary_sp = std::make_shared<StringSummaryFormat>( + flags, summary_string.str().c_str()); + FormatterMatchType match_type = eFormatterMatchExact; + if (type_name.front() == '^') + match_type = eFormatterMatchRegex; + category->AddTypeSummary(type_name, match_type, summary_sp); + LLDB_LOG(GetLog(LLDBLog::DataFormatters), + "Loaded embedded type summary for '{0}' from {1}.", type_name, + module_sp->GetFileSpec()); + }); +} + +/// Load data formatters embedded in the binary. These are type summaries provided +/// by the authors of the code. +static void LoadFormattersForModule(ModuleSP module_sp) { + ForEachFormatterInModule( + *module_sp, eSectionTypeLLDBFormatters, + [&](llvm::DataExtractor extractor, llvm::StringRef type_name) { + // * Function signature (1 byte) + // * Length of the program (ULEB128) + // * The program bytecode + TypeCategoryImplSP category; + DataVisualization::Categories::GetCategory(ConstString("default"), + category); + llvm::DataExtractor::Cursor cursor(0); + while (cursor && cursor.tell() < extractor.size()) { + uint8_t signature = extractor.getU8(cursor); + uint64_t size = extractor.getULEB128(cursor); + llvm::StringRef bytecode = extractor.getBytes(cursor, size); + if (!cursor) { + LLDB_LOG_ERROR(GetLog(LLDBLog::DataFormatters), cursor.takeError(), + "{0}"); + return; + } + if (signature == 0) { + TypeSummaryImpl::Flags flags; + auto summary_sp = std::make_shared<BytecodeSummaryFormat>( + flags, llvm::MemoryBuffer::getMemBufferCopy(bytecode)); + FormatterMatchType match_type = eFormatterMatchExact; + if (type_name.front() == '^') + match_type = eFormatterMatchRegex; + category->AddTypeSummary(type_name, match_type, summary_sp); + LLDB_LOG(GetLog(LLDBLog::DataFormatters), + "Loaded embedded type summary for '{0}' from {1}.", + type_name, module_sp->GetFileSpec()); + } else + LLDB_LOG(GetLog(LLDBLog::DataFormatters), + "Unsupported formatter signature {0} for '{1}' in {2}", + signature, type_name, module_sp->GetFileSpec()); + } + }); } void Target::ClearModules(bool delete_locations) { @@ -1847,6 +1925,7 @@ void Target::ModulesDidLoad(ModuleList &module_list) { ModuleSP module_sp(module_list.GetModuleAtIndex(idx)); LoadScriptingResourceForModule(module_sp, this); LoadTypeSummariesForModule(module_sp); + LoadFormattersForModule(module_sp); } m_breakpoint_list.UpdateBreakpoints(module_list, true, false); m_internal_breakpoint_list.UpdateBreakpoints(module_list, true, false); diff --git a/lldb/test/API/functionalities/data-formatter/bytecode-summary/Makefile b/lldb/test/API/functionalities/data-formatter/bytecode-summary/Makefile new file mode 100644 index 00000000000000..c9319d6e6888a4 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/bytecode-summary/Makefile @@ -0,0 +1,2 @@ +C_SOURCES := main.c +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/bytecode-summary/TestBytecodeSummary.py b/lldb/test/API/functionalities/data-formatter/bytecode-summary/TestBytecodeSummary.py new file mode 100644 index 00000000000000..7c47ddcdfc13c9 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/bytecode-summary/TestBytecodeSummary.py @@ -0,0 +1,14 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + @skipUnlessDarwin + def test(self): + self.build() + self.expect('log enable -v lldb formatters') + lldbutil.run_to_source_breakpoint(self, "break here", lldb.SBFileSpec("main.cpp")) + self.expect("v x", substrs=['(MyOptional<int>) x = None']) + self.expect("v y", substrs=['(MyOptional<int>) y = 42']) diff --git a/lldb/test/API/functionalities/data-formatter/bytecode-summary/main.cpp b/lldb/test/API/functionalities/data-formatter/bytecode-summary/main.cpp new file mode 100644 index 00000000000000..d77d17acb0d0ec --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/bytecode-summary/main.cpp @@ -0,0 +1,36 @@ +// A bare-bones llvm::Optional reimplementation. + +template <typename T> struct MyOptionalStorage { + MyOptionalStorage(T val) : value(val), hasVal(true) {} + MyOptionalStorage() {} + T value; + bool hasVal = false; +}; + +template <typename T> struct MyOptional { + MyOptionalStorage<T> Storage; + MyOptional(T val) : Storage(val) {} + MyOptional() {} + T &operator*() { return Storage.value; } +}; + +void stop() {} + +int main(int argc, char **argv) { + MyOptional<int> x, y = 42; + stop(); // break here + return *y; +} + +__attribute__((used, section("__DATA_CONST,__lldbformatters"))) unsigned char + _MyOptional_type_summary[] = + "\x01" // version + "\xa3" // record size + "\x01" // record size + "\x10" // type name size + "^MyOptional<.+>$" // type name + "\x00" // sig_summary + "\x8d" // program size + "\x01" // program size + "\x1\x22\x7Storage#\x12\x60\x1,C\x10\x1\x5\x11\x2\x1\x22\x6hasVal#\x12\x60\x1,\x10\x1e\x2\x22\x1b<could not read MyOptional>\x10G#!\x60 \x0P\x10\x6\x22\x4None\x10\x36\x1#\x15\x60 \x0#\x16\x60\x5\x22\x5value#\x12\x60\x5#\x17\x60\x1,\x10\x6\x22\x4None\x10\x11\x1#\x0\x60\x1#R\x60\x10\x3# \x60\x10\x1\x2\x12\x12\x12\x12" + ; // summary function diff --git a/lldb/unittests/DataFormatter/CMakeLists.txt b/lldb/unittests/DataFormatter/CMakeLists.txt index 9d967a72bfd1fa..b858db1c716347 100644 --- a/lldb/unittests/DataFormatter/CMakeLists.txt +++ b/lldb/unittests/DataFormatter/CMakeLists.txt @@ -1,6 +1,7 @@ add_lldb_unittest(LLDBFormatterTests FormatManagerTests.cpp FormattersContainerTest.cpp + FormatterBytecodeTest.cpp StringPrinterTests.cpp LINK_LIBS diff --git a/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp b/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp new file mode 100644 index 00000000000000..621eed0471a0e5 --- /dev/null +++ b/lldb/unittests/DataFormatter/FormatterBytecodeTest.cpp @@ -0,0 +1,36 @@ +#include "DataFormatters/FormatterBytecode.h" +#include "lldb/Utility/StreamString.h" + +#include "gtest/gtest.h" + +using namespace lldb_private; +using namespace lldb; +using namespace FormatterBytecode; +using llvm::StringRef; + +namespace { +class FormatterBytecodeTest : public ::testing::Test {}; + +bool Interpret(std::vector<uint8_t> code, DataStack &data) { + auto buf = StringRef(reinterpret_cast<const char *>(code.data()), code.size()); + std::vector<ControlStackElement> control({buf}); + if (auto error = Interpret(control, data, sel_summary)) { + llvm::errs() << llvm::toString(std::move(error)) <<"\n"; + return false; + } + return true; +} + +} // namespace + +TEST_F(FormatterBytecodeTest, Basic) { + { + DataStack data; + ASSERT_TRUE(Interpret({op_lit_uint, 23, op_dup, op_plus}, data)); + ASSERT_EQ(data.Pop<uint64_t>(), (uint64_t)46); + } + { + DataStack data; + ASSERT_FALSE(Interpret({op_lit_uint, 23, op_lit_uint, 0, op_div}, data)); + } +} _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits