https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/107279
>From e45d7e68a371a09ea766c4accf8edc6c030fd7fd Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Wed, 4 Sep 2024 12:09:50 +0100 Subject: [PATCH 1/3] Add CMake option to enable expensive line number origin tracking --- llvm/CMakeLists.txt | 4 ++++ llvm/cmake/modules/HandleLLVMOptions.cmake | 12 ++++++++++++ llvm/docs/CMake.rst | 11 +++++++++++ llvm/include/llvm/Config/config.h.cmake | 4 ++++ 4 files changed, 31 insertions(+) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 12618966c4adfd..3e2e90f5adad2e 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -524,6 +524,10 @@ endif() option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF) +set(LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING "DISABLED" CACHE STRING + "Enhance debugify's line number coverage tracking; enabling this is abi-breaking. Can be DISABLED, COVERAGE, or COVERAGE_AND_ORIGIN.") +set_property(CACHE LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING PROPERTY STRINGS DISABLED COVERAGE COVERAGE_AND_ORIGIN) + set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF) if (MINGW) # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 5ca580fbb59c59..a4b11c149da9de 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -196,6 +196,18 @@ else() message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!") endif() +string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING) + +if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" ) + set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 ) +elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" ) + message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.") +elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING ) + # The DISABLED setting is default and requires no additional defines. +else() + message(FATAL_ERROR "Unknown value for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING: \"${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}\"!") +endif() + if( LLVM_REVERSE_ITERATION ) set( LLVM_ENABLE_REVERSE_ITERATION 1 ) endif() diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 2a80813999ea1e..304e22759770d9 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -475,6 +475,17 @@ enabled sub-projects. Nearly all of these variable names begin with **LLVM_ENABLE_BINDINGS**:BOOL If disabled, do not try to build the OCaml bindings. +**LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING**:STRING + Enhances Debugify's ability to detect line number errors by storing extra + information inside Instructions, removing false positives from Debugify's + results at the cost of performance. Allowed values are `DISABLED` (default), + `COVERAGE`, and `COVERAGE_AND_ORIGIN`. `COVERAGE` tracks whether and why a + line number was intentionally dropped or not generated for an instruction, + allowing Debugify to avoid reporting these as errors. `COVERAGE_AND_ORIGIN` + additionally stores a stacktrace of the point where each DebugLoc is + unintentionally dropped, allowing for much easier bug triaging at the cost of + a ~10x performance slowdown. + **LLVM_ENABLE_DIA_SDK**:BOOL Enable building with MSVC DIA SDK for PDB debugging support. Available only with MSVC. Defaults to ON. diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index ff30741c8f360a..388ce1e8f74e3e 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -19,6 +19,10 @@ /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS +/* Define to 1 to enable expensive checks for debug location coverage checking, + and to 0 otherwise. */ +#cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING + /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer backslashes. */ #cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH >From abab69ae42bf5650d6a8fff5a22341ff32effe57 Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Wed, 4 Sep 2024 12:23:52 +0100 Subject: [PATCH 2/3] Add conditionally-enabled DebugLocKinds --- clang/lib/CodeGen/BackendUtil.cpp | 16 +++++ llvm/include/llvm/IR/DebugLoc.h | 74 +++++++++++++++++++++- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 5 ++ llvm/lib/IR/DebugInfo.cpp | 4 +- llvm/lib/IR/DebugLoc.cpp | 16 +++++ llvm/lib/Transforms/Utils/Debugify.cpp | 19 ++++-- 6 files changed, 124 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e765bbf637a661..20653daff7d4ae 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -911,6 +911,22 @@ void EmitAssemblyHelper::RunOptimizationPipeline( Debugify.setOrigDIVerifyBugsReportFilePath( CodeGenOpts.DIBugsReportFilePath); Debugify.registerCallbacks(PIC, MAM); + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // If we're using debug location coverage tracking, mark all the + // instructions coming out of the frontend without a DebugLoc as being + // intentional line-zero locations, to prevent both those instructions and + // new instructions that inherit their location from being treated as + // incorrectly empty locations. + for (Function &F : *TheModule) { + if (!F.getSubprogram()) + continue; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (!I.getDebugLoc()) + I.setDebugLoc(DebugLoc::getLineZero()); + } +#endif } // Attempt to load pass plugins and register their callbacks with PB. for (auto &PluginFN : CodeGenOpts.PassPlugins) { diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index c22d3e9b10d27f..ae5f9d72c97e26 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_DEBUGLOC_H #define LLVM_IR_DEBUGLOC_H +#include "llvm/Config/config.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/Support/DataTypes.h" @@ -22,6 +23,67 @@ namespace llvm { class LLVMContext; class raw_ostream; class DILocation; + class Function; + +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc + // is either ordinary, containing a valid DILocation, or otherwise describing + // the reason why the DebugLoc does not contain a valid DILocation. + enum class DebugLocKind : uint8_t { + // DebugLoc is expected to contain a valid DILocation. + Normal, + // DebugLoc intentionally does not have a valid DILocation; may be for a + // compiler-generated instruction, or an explicitly dropped location. + LineZero, + // DebugLoc does not have a known or currently knowable source location, + // e.g. the attribution is ambiguous in a way that can't be represented, or + // determining the correct location is complicated and requires future + // developer effort. + Unknown, + // DebugLoc is attached to an instruction that we don't expect to be + // emitted, and so can omit a valid DILocation; we don't expect to ever try + // and emit these into the line table, and trying to do so is a sign that + // something has gone wrong (most likely a DebugLoc leaking from a transient + // compiler-generated instruction). + Temporary + }; + + // Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify + // to ignore intentionally-empty DebugLocs. + class DILocAndCoverageTracking : public TrackingMDNodeRef { + public: + DebugLocKind Kind; + // Default constructor for empty DebugLocs. + DILocAndCoverageTracking() + : TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {} + // Valid or nullptr MDNode*, normal DebugLocKind. + DILocAndCoverageTracking(const MDNode *Loc) + : TrackingMDNodeRef(const_cast<MDNode *>(Loc)), + Kind(DebugLocKind::Normal) {} + DILocAndCoverageTracking(const DILocation *Loc); + // Explicit DebugLocKind, which always means a nullptr MDNode*. + DILocAndCoverageTracking(DebugLocKind Kind) + : TrackingMDNodeRef(nullptr), Kind(Kind) {} + }; + template <> struct simplify_type<DILocAndCoverageTracking> { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + template <> struct simplify_type<const DILocAndCoverageTracking> { + using SimpleType = MDNode *; + + static MDNode *getSimplifiedValue(const DILocAndCoverageTracking &MD) { + return MD.get(); + } + }; + + using DebugLocTrackingRef = DILocAndCoverageTracking; +#else + using DebugLocTrackingRef = TrackingMDNodeRef; +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING /// A debug info location. /// @@ -31,7 +93,8 @@ namespace llvm { /// To avoid extra includes, \a DebugLoc doubles the \a DILocation API with a /// one based on relatively opaque \a MDNode pointers. class DebugLoc { - TrackingMDNodeRef Loc; + + DebugLocTrackingRef Loc; public: DebugLoc() = default; @@ -47,6 +110,15 @@ namespace llvm { /// IR. explicit DebugLoc(const MDNode *N); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLoc(DebugLocKind Kind) : Loc(Kind) {} + DebugLocKind getKind() const { return Loc.Kind; } +#endif + + static DebugLoc getTemporary(); + static DebugLoc getUnknown(); + static DebugLoc getLineZero(); + /// Get the underlying \a DILocation. /// /// \pre !*this or \c isa<DILocation>(getAsMDNode()). diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f88653146cc6ff..4ba8262259b112 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/IR/Constants.h" @@ -2080,6 +2081,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } if (!DL) { + // FIXME: We could assert that `DL.getKind() != DebugLocKind::Temporary` + // here, or otherwise record any temporary DebugLocs seen to ensure that + // transient compiler-generated instructions aren't leaking their DLs to + // other instructions. // We have an unspecified location, which might want to be line 0. // If we have already emitted a line-0 record, don't repeat it. if (LastAsmLine == 0) diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 7fa1f9696d43b2..86ac46540c5ef9 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -979,7 +979,7 @@ void Instruction::dropLocation() { } if (!MayLowerToCall) { - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getLineZero()); return; } @@ -998,7 +998,7 @@ void Instruction::dropLocation() { // // One alternative is to set a line 0 location with the existing scope and // inlinedAt info. The location might be sensitive to when inlining occurs. - setDebugLoc(DebugLoc()); + setDebugLoc(DebugLoc::getLineZero()); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index bdea52180f74ae..501eafd0175b7b 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -11,6 +11,22 @@ #include "llvm/IR/DebugInfo.h" using namespace llvm; +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING +DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) + : TrackingMDNodeRef(const_cast<DILocation *>(L)), + Kind(DebugLocKind::Normal) {} + +DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); } +DebugLoc DebugLoc::getUnknown() { return DebugLoc(DebugLocKind::Unknown); } +DebugLoc DebugLoc::getLineZero() { return DebugLoc(DebugLocKind::LineZero); } + +#else + +DebugLoc DebugLoc::getTemporary() { return DebugLoc(); } +DebugLoc DebugLoc::getUnknown() { return DebugLoc(); } +DebugLoc DebugLoc::getLineZero() { return DebugLoc(); } +#endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING + //===----------------------------------------------------------------------===// // DebugLoc Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp index fcc82eadac36cf..f9f85d05ab45c5 100644 --- a/llvm/lib/Transforms/Utils/Debugify.cpp +++ b/llvm/lib/Transforms/Utils/Debugify.cpp @@ -292,6 +292,16 @@ bool llvm::stripDebugifyMetadata(Module &M) { return Changed; } +bool hasLoc(const Instruction &I) { + const DILocation *Loc = I.getDebugLoc().get(); +#if ENABLE_DEBUGLOC_COVERAGE_TRACKING + DebugLocKind Kind = I.getDebugLoc().getKind(); + return Loc || Kind != DebugLocKind::Normal; +#else + return Loc; +#endif +} + bool llvm::collectDebugInfoMetadata(Module &M, iterator_range<Module::iterator> Functions, DebugInfoPerPass &DebugInfoBeforePass, @@ -364,9 +374,7 @@ bool llvm::collectDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); DebugInfoBeforePass.InstToDelete.insert({&I, &I}); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - DebugInfoBeforePass.DILocations.insert({&I, HasLoc}); + DebugInfoBeforePass.DILocations.insert({&I, hasLoc(I)}); } } } @@ -609,10 +617,7 @@ bool llvm::checkDebugInfoMetadata(Module &M, LLVM_DEBUG(dbgs() << " Collecting info for inst: " << I << '\n'); - const DILocation *Loc = I.getDebugLoc().get(); - bool HasLoc = Loc != nullptr; - - DebugInfoAfterPass.DILocations.insert({&I, HasLoc}); + DebugInfoAfterPass.DILocations.insert({&I, hasLoc(I)}); } } } >From c01c61535b0d1629ab363bc60c742ba4ab307cbf Mon Sep 17 00:00:00 2001 From: Stephen Tozer <stephen.to...@sony.com> Date: Thu, 26 Sep 2024 15:32:31 +0100 Subject: [PATCH 3/3] Split LineZero into subcategories, use getDropped on empty debuglocs --- clang/lib/CodeGen/BackendUtil.cpp | 6 +++--- llvm/include/llvm/IR/DebugLoc.h | 31 +++++++++++++++++++------------ llvm/lib/IR/DebugInfo.cpp | 8 +++++--- llvm/lib/IR/DebugLoc.cpp | 8 ++++---- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 20653daff7d4ae..396d0f974bcae1 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -915,8 +915,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( #if ENABLE_DEBUGLOC_COVERAGE_TRACKING // If we're using debug location coverage tracking, mark all the // instructions coming out of the frontend without a DebugLoc as being - // intentional line-zero locations, to prevent both those instructions and - // new instructions that inherit their location from being treated as + // compiler-generated, to prevent both those instructions and new + // instructions that inherit their location from being treated as // incorrectly empty locations. for (Function &F : *TheModule) { if (!F.getSubprogram()) @@ -924,7 +924,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( for (BasicBlock &BB : F) for (Instruction &I : BB) if (!I.getDebugLoc()) - I.setDebugLoc(DebugLoc::getLineZero()); + I.setDebugLoc(DebugLoc::getCompilerGenerated()); } #endif } diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index ae5f9d72c97e26..e6af442fc266be 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -26,19 +26,25 @@ namespace llvm { class Function; #if ENABLE_DEBUGLOC_COVERAGE_TRACKING - // Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc - // is either ordinary, containing a valid DILocation, or otherwise describing - // the reason why the DebugLoc does not contain a valid DILocation. + // Used to represent different "kinds" of DebugLoc, expressing that the + // instruction it is part of is either normal and should contain a valid + // DILocation, or otherwise describing the reason why the instruction does + // not contain a valid DILocation. enum class DebugLocKind : uint8_t { - // DebugLoc is expected to contain a valid DILocation. + // The instruction is expected to contain a valid DILocation. Normal, - // DebugLoc intentionally does not have a valid DILocation; may be for a - // compiler-generated instruction, or an explicitly dropped location. - LineZero, - // DebugLoc does not have a known or currently knowable source location, - // e.g. the attribution is ambiguous in a way that can't be represented, or - // determining the correct location is complicated and requires future - // developer effort. + // The instruction is compiler-generated, i.e. it is not associated with any + // line in the original source. + CompilerGenerated, + // The instruction has intentionally had its source location removed, + // typically because it was moved outside of its original control-flow and + // presenting the prior source location would be misleading for debuggers + // or profilers. + Dropped, + // The instruction does not have a known or currently knowable source + // location, e.g. the attribution is ambiguous in a way that can't be + // represented, or determining the correct location is complicated and + // requires future developer effort. Unknown, // DebugLoc is attached to an instruction that we don't expect to be // emitted, and so can omit a valid DILocation; we don't expect to ever try @@ -117,7 +123,8 @@ namespace llvm { static DebugLoc getTemporary(); static DebugLoc getUnknown(); - static DebugLoc getLineZero(); + static DebugLoc getCompilerGenerated(); + static DebugLoc getDropped(); /// Get the underlying \a DILocation. /// diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 86ac46540c5ef9..f59372806f9c17 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -966,8 +966,10 @@ void Instruction::updateLocationAfterHoist() { dropLocation(); } void Instruction::dropLocation() { const DebugLoc &DL = getDebugLoc(); - if (!DL) + if (!DL) { + setDebugLoc(DebugLoc::getDropped()); return; + } // If this isn't a call, drop the location to allow a location from a // preceding instruction to propagate. @@ -979,7 +981,7 @@ void Instruction::dropLocation() { } if (!MayLowerToCall) { - setDebugLoc(DebugLoc::getLineZero()); + setDebugLoc(DebugLoc::getDropped()); return; } @@ -998,7 +1000,7 @@ void Instruction::dropLocation() { // // One alternative is to set a line 0 location with the existing scope and // inlinedAt info. The location might be sensitive to when inlining occurs. - setDebugLoc(DebugLoc::getLineZero()); + setDebugLoc(DebugLoc::getDropped()); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/DebugLoc.cpp b/llvm/lib/IR/DebugLoc.cpp index 501eafd0175b7b..87d14a05718a8b 100644 --- a/llvm/lib/IR/DebugLoc.cpp +++ b/llvm/lib/IR/DebugLoc.cpp @@ -18,13 +18,13 @@ DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L) DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); } DebugLoc DebugLoc::getUnknown() { return DebugLoc(DebugLocKind::Unknown); } -DebugLoc DebugLoc::getLineZero() { return DebugLoc(DebugLocKind::LineZero); } - +DebugLoc DebugLoc::getCompilerGenerated() { return DebugLoc(DebugLocKind::CompilerGenerated); } +DebugLoc DebugLoc::getDropped() { return DebugLoc(DebugLocKind::Dropped); } #else - DebugLoc DebugLoc::getTemporary() { return DebugLoc(); } DebugLoc DebugLoc::getUnknown() { return DebugLoc(); } -DebugLoc DebugLoc::getLineZero() { return DebugLoc(); } +DebugLoc DebugLoc::getCompilerGenerated() { return DebugLoc(); } +DebugLoc DebugLoc::getDropped() { return DebugLoc(); } #endif // ENABLE_DEBUGLOC_COVERAGE_TRACKING //===----------------------------------------------------------------------===// _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits