jlebar created this revision. jlebar added a reviewer: rnk. jlebar added subscribers: tra, cfe-commits.
Previously, when you did something not allowed in a host+device function and then caused it to be codegen'ed, we would print out an error telling you that you did something bad, but we wouldn't tell you how we decided that the function needed to be codegen'ed. This change causes us to print out a callstack when emitting deferred errors. This is immensely helpful when debugging highly-templated code, where it's often unclear how a function became known-emitted. We only print the callstack once per function, after we print the all deferred errors. This patch also switches all of our hashtables to using canonical FunctionDecls instead of regular FunctionDecls. This prevents a number of bugs, some of which are caught by tests added here, in which we assume that two FDs for the same function have the same pointer value. https://reviews.llvm.org/D25704 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaCUDA.cpp clang/test/SemaCUDA/bad-calls-on-same-line.cu clang/test/SemaCUDA/call-device-fn-from-host.cu clang/test/SemaCUDA/call-host-fn-from-device.cu clang/test/SemaCUDA/call-stack-for-deferred-err.cu clang/test/SemaCUDA/exceptions-host-device.cu clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu clang/test/SemaCUDA/trace-through-global.cu
Index: clang/test/SemaCUDA/trace-through-global.cu =================================================================== --- clang/test/SemaCUDA/trace-through-global.cu +++ clang/test/SemaCUDA/trace-through-global.cu @@ -35,10 +35,16 @@ template <typename T> void launch_kernel() { kernel<<<0, 0>>>(T()); - hd1(); - hd3(T()); + + // Notice that these two diagnostics are different: Because the call to hd1 + // is not dependent on T, the call to hd1 comes from 'launch_kernel', while + // the call to hd3, being dependent, comes from 'launch_kernel<int>'. + hd1(); // expected-note {{called by 'launch_kernel'}} + hd3(T()); // expected-note {{called by 'launch_kernel<int>'}} } void host_fn() { launch_kernel<int>(); + // expected-note@-1 {{called by 'host_fn'}} + // expected-note@-2 {{called by 'host_fn'}} } Index: clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/no-call-stack-for-immediate-errs.cu @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +// Here we should dump an error about the VLA in device_fn, but we should not +// print a callstack indicating how device_fn becomes known-emitted, because +// it's an error to use a VLA in any __device__ function, even one that doesn't +// get emitted. + +inline __device__ void device_fn(int n); +inline __device__ void device_fn2() { device_fn(42); } + +__global__ void kernel() { device_fn2(); } + +inline __device__ void device_fn(int n) { + int vla[n]; // expected-error {{variable-length array}} +} Index: clang/test/SemaCUDA/exceptions-host-device.cu =================================================================== --- clang/test/SemaCUDA/exceptions-host-device.cu +++ clang/test/SemaCUDA/exceptions-host-device.cu @@ -36,3 +36,6 @@ #endif } __device__ void call_hd3() { hd3(); } +#ifndef HOST +// expected-note@-2 {{called by 'call_hd3'}} +#endif Index: clang/test/SemaCUDA/call-stack-for-deferred-err.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/call-stack-for-deferred-err.cu @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +// We should emit an error for hd_fn's use of a VLA. This would have been +// legal if hd_fn were never codegen'ed on the device, so we should also print +// out a callstack showing how we determine that hd_fn is known-emitted. +// +// Compare to no-call-stack-for-deferred-err.cu. + +inline __host__ __device__ void hd_fn(int n); +inline __device__ void device_fn2() { hd_fn(42); } // expected-note {{called by 'device_fn2'}} + +__global__ void kernel() { device_fn2(); } // expected-note {{called by 'kernel'}} + +inline __host__ __device__ void hd_fn(int n) { + int vla[n]; // expected-error {{variable-length array}} +} Index: clang/test/SemaCUDA/call-host-fn-from-device.cu =================================================================== --- clang/test/SemaCUDA/call-host-fn-from-device.cu +++ clang/test/SemaCUDA/call-host-fn-from-device.cu @@ -1,5 +1,5 @@ // RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \ -// RUN: -emit-llvm -o /dev/null -verify +// RUN: -emit-llvm -o /dev/null -verify -verify-ignore-unexpected=note // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/SemaCUDA/call-device-fn-from-host.cu =================================================================== --- clang/test/SemaCUDA/call-device-fn-from-host.cu +++ clang/test/SemaCUDA/call-device-fn-from-host.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - -verify +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -verify-ignore-unexpected=note // Note: This test won't work with -fsyntax-only, because some of these errors // are emitted during codegen. Index: clang/test/SemaCUDA/bad-calls-on-same-line.cu =================================================================== --- clang/test/SemaCUDA/bad-calls-on-same-line.cu +++ clang/test/SemaCUDA/bad-calls-on-same-line.cu @@ -35,5 +35,7 @@ void host_fn() { hd<int>(); hd<double>(); // expected-note {{function template specialization 'hd<double>'}} + // expected-note@-1 {{called by 'host_fn'}} hd<float>(); // expected-note {{function template specialization 'hd<float>'}} + // expected-note@-1 {{called by 'host_fn'}} } Index: clang/lib/Sema/SemaCUDA.cpp =================================================================== --- clang/lib/Sema/SemaCUDA.cpp +++ clang/lib/Sema/SemaCUDA.cpp @@ -488,22 +488,6 @@ NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } -Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, - unsigned DiagID, FunctionDecl *Fn, - Sema &S) { - switch (K) { - case K_Nop: - break; - case K_Immediate: - ImmediateDiagBuilder.emplace(S.Diag(Loc, DiagID)); - break; - case K_Deferred: - assert(Fn && "Must have a function to attach the deferred diag to."); - PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn); - break; - } -} - // In CUDA, there are some constructs which may appear in semantically-valid // code, but trigger errors if we ever generate code for the function in which // they appear. Essentially every construct you're not allowed to use on the @@ -528,6 +512,54 @@ // until we discover that the function is known-emitted, at which point we take // it out of this map and emit the diagnostic. +Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, + unsigned DiagID, FunctionDecl *Fn, + Sema &S) + : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), + ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { + switch (K) { + case K_Nop: + break; + case K_Immediate: + case K_ImmediateWithCallStack: + ImmediateDiag.emplace(S.Diag(Loc, DiagID)); + break; + case K_Deferred: + assert(Fn && "Must have a function to attach the deferred diag to."); + PartialDiag.emplace(S.PDiag(DiagID)); + break; + } +} + +// Print notes showing how we can reach FD starting from an a priori +// known-callable function. +static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { + auto FnIt = S.CUDAKnownEmittedFns.find(FD); + while (FnIt != S.CUDAKnownEmittedFns.end()) { + DiagnosticBuilder Builder( + S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); + Builder << FnIt->second.FD; + Builder.setForceEmit(); + + FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); + } +} + +Sema::CUDADiagBuilder::~CUDADiagBuilder() { + if (ImmediateDiag) { + // Emit our diagnostic and, if it was a warning or error, output a callstack + // if Fn isn't a priori known-emitted. + bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( + DiagID, Loc) >= DiagnosticsEngine::Warning; + ImmediateDiag.reset(); // Emit the immediate diag. + if (IsWarningOrError && ShowCallStack) + EmitCallStackNotes(S, Fn); + } else if (PartialDiag) { + assert(ShowCallStack && "Must always show call stack for deferred diags."); + S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); + } +} + // Do we know that we will eventually codegen the given function? static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { // Templates are emitted when they're instantiated. @@ -568,7 +600,7 @@ // mode until the function is known-emitted. if (getLangOpts().CUDAIsDevice) { return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) - ? CUDADiagBuilder::K_Immediate + ? CUDADiagBuilder::K_ImmediateWithCallStack : CUDADiagBuilder::K_Deferred; } return CUDADiagBuilder::K_Nop; @@ -596,7 +628,7 @@ return CUDADiagBuilder::K_Nop; return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) - ? CUDADiagBuilder::K_Immediate + ? CUDADiagBuilder::K_ImmediateWithCallStack : CUDADiagBuilder::K_Deferred; default: return CUDADiagBuilder::K_Nop; @@ -612,63 +644,81 @@ auto It = S.CUDADeferredDiags.find(FD); if (It == S.CUDADeferredDiags.end()) return; + bool HasWarningOrError = false; for (PartialDiagnosticAt &PDAt : It->second) { const SourceLocation &Loc = PDAt.first; const PartialDiagnostic &PD = PDAt.second; + HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( + PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); Builder.setForceEmit(); PD.Emit(Builder); } S.CUDADeferredDiags.erase(It); + + // FIXME: Should this be called after every warning/error emitted in the loop + // above, instead of just once per function? That would be consistent with + // how we handle immediate errors, but it also seems like a bit much. + if (HasWarningOrError) + EmitCallStackNotes(S, FD); } // Indicate that this function (and thus everything it transtively calls) will // be codegen'ed, and emit any deferred diagnostics on this function and its // (transitive) callees. -static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) { +static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, + FunctionDecl *OrigCallee, SourceLocation OrigLoc) { // Nothing to do if we already know that FD is emitted. - if (IsKnownEmitted(S, FD)) { - assert(!S.CUDACallGraph.count(FD)); + if (IsKnownEmitted(S, OrigCallee)) { + assert(!S.CUDACallGraph.count(OrigCallee)); return; } - // We've just discovered that FD is known-emitted. Walk our call graph to see - // what else we can now discover also must be emitted. - llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD}; - llvm::SmallSet<FunctionDecl *, 4> Seen; - Seen.insert(FD); + // We've just discovered that OrigCallee is known-emitted. Walk our call + // graph to see what else we can now discover also must be emitted. + + struct CallInfo { + FunctionDecl *Caller; + FunctionDecl *Callee; + SourceLocation Loc; + }; + llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; + llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; + Seen.insert(OrigCallee); while (!Worklist.empty()) { - FunctionDecl *Caller = Worklist.pop_back_val(); - assert(!IsKnownEmitted(S, Caller) && + CallInfo C = Worklist.pop_back_val(); + assert(!IsKnownEmitted(S, C.Callee) && "Worklist should not contain known-emitted functions."); - S.CUDAKnownEmittedFns.insert(Caller); - EmitDeferredDiags(S, Caller); + S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; + EmitDeferredDiags(S, C.Callee); // If this is a template instantiation, explore its callgraph as well: // Non-dependent calls are part of the template's callgraph, while dependent // calls are part of to the instantiation's call graph. - if (auto *Templ = Caller->getPrimaryTemplate()) { + if (auto *Templ = C.Callee->getPrimaryTemplate()) { FunctionDecl *TemplFD = Templ->getAsFunction(); if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { Seen.insert(TemplFD); - Worklist.push_back(TemplFD); + Worklist.push_back( + {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); } } - // Add all functions called by Caller to our worklist. - auto CGIt = S.CUDACallGraph.find(Caller); + // Add all functions called by Callee to our worklist. + auto CGIt = S.CUDACallGraph.find(C.Callee); if (CGIt == S.CUDACallGraph.end()) continue; - for (FunctionDecl *Callee : CGIt->second) { - if (Seen.count(Callee) || IsKnownEmitted(S, Callee)) + for (Sema::FunctionDeclAndLoc FDL : CGIt->second) { + if (Seen.count(FDL.FD) || IsKnownEmitted(S, FDL.FD)) continue; - Seen.insert(Callee); - Worklist.push_back(Callee); + Seen.insert(FDL.FD); + Worklist.push_back( + {/* Caller = */ C.Callee, /* Callee = */ FDL.FD, FDL.Loc}); } - // Caller is now known-emitted, so we no longer need to maintain its list of - // callees in CUDACallGraph. + // C.Callee is now known-emitted, so we no longer need to maintain its list + // of callees in CUDACallGraph. S.CUDACallGraph.erase(CGIt); } } @@ -686,16 +736,16 @@ // Otherwise, mark the call in our call graph so we can traverse it later. bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); if (CallerKnownEmitted) - MarkKnownEmitted(*this, Callee); + MarkKnownEmitted(*this, Caller, Callee, Loc); else { // If we have // host fn calls kernel fn calls host+device, // the HD function does not get instantiated on the host. We model this by // omitting at the call to the kernel from the callgraph. This ensures // that, when compiling for host, only HD functions actually called from the // host get marked as known-emitted. if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) - CUDACallGraph[Caller].insert(Callee); + CUDACallGraph[Caller].insert(FunctionDeclAndLoc{Callee, Loc}); } CUDADiagBuilder::Kind DiagKind = [&] { @@ -707,7 +757,7 @@ // If we know the caller will be emitted, we know this wrong-side call // will be emitted, so it's an immediate error. Otherwise, defer the // error until we know the caller is emitted. - return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate + return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack : CUDADiagBuilder::K_Deferred; default: return CUDADiagBuilder::K_Nop; @@ -729,7 +779,8 @@ CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, Caller, *this) << Callee; - return DiagKind != CUDADiagBuilder::K_Immediate; + return DiagKind != CUDADiagBuilder::K_Immediate && + DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -9249,26 +9249,48 @@ /// Diagnostics that are emitted only if we discover that the given function /// must be codegen'ed. Because handling these correctly adds overhead to /// compilation, this is currently only enabled for CUDA compilations. - llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>> + llvm::DenseMap<CanonicalDeclPtr<FunctionDecl>, + std::vector<PartialDiagnosticAt>> CUDADeferredDiags; /// FunctionDecls plus raw encodings of SourceLocations for which /// CheckCUDACall has emitted a (maybe deferred) "bad call" diagnostic. We /// use this to avoid emitting the same deferred diag twice. - llvm::DenseSet<std::pair<FunctionDecl *, unsigned>> LocsWithCUDACallDiags; + llvm::DenseSet<std::pair<CanonicalDeclPtr<FunctionDecl>, unsigned>> + LocsWithCUDACallDiags; - /// The set of CUDA functions that we've discovered must be emitted by tracing - /// the call graph. Functions that we can tell a priori must be emitted - /// aren't added to this set. - llvm::DenseSet<FunctionDecl *> CUDAKnownEmittedFns; + /// A pair of a canonical FunctionDecl and a SourceLocation. + /// + /// When FunctionDeclAndLoc is used as the key of a DenseMap, its + /// SourceLocation is ignored. + struct FunctionDeclAndLoc { + CanonicalDeclPtr<FunctionDecl> FD; + SourceLocation Loc; + }; + + /// An inverse call graph, mapping known-emitted functions to one of their + /// known-emitted callers (plus the location of the call). + /// + /// Functions that we can tell a priori must be emitted aren't added to this + /// map. + llvm::DenseMap</* Callee = */ CanonicalDeclPtr<FunctionDecl>, + /* Caller = */ FunctionDeclAndLoc> + CUDAKnownEmittedFns; /// A partial call graph maintained during CUDA compilation to support - /// deferred diagnostics. Specifically, functions are only added here if, at - /// the time they're added, they are not known-emitted. As soon as we - /// discover that a function is known-emitted, we remove it and everything it - /// transitively calls from this set and add those functions to - /// CUDAKnownEmittedFns. - llvm::DenseMap<FunctionDecl *, llvm::SetVector<FunctionDecl *>> CUDACallGraph; + /// deferred diagnostics. + /// + /// Functions are only added here if, at the time they're considered, they are + /// not known-emitted. As soon as we discover that a function is + /// known-emitted, we remove it and everything it transitively calls from this + /// set and add those functions to CUDAKnownEmittedFns. + /// + /// FunctionDeclAndLoc's DenseMap configuration ignores the SourceLocation, so + /// only the first of Caller's calls to Callee will be stored in the + /// SetVector. + llvm::DenseMap</* Caller = */ CanonicalDeclPtr<FunctionDecl>, + /* Callees = */ llvm::SetVector<FunctionDeclAndLoc>> + CUDACallGraph; /// Diagnostic builder for CUDA errors which may or may not be deferred. /// @@ -9291,13 +9313,19 @@ K_Nop, /// Emit the diagnostic immediately (i.e., behave like Sema::Diag()). K_Immediate, + /// Emit the diagnostic immediately, and, if it's a warning or error, also + /// emit a call stack showing how this function can be reached by an a + /// priori known-emitted function. + K_ImmediateWithCallStack, /// Create a deferred diagnostic, which is emitted only if the function - /// it's attached to is codegen'ed. + /// it's attached to is codegen'ed. Also emit a call stack as with + /// K_ImmedaiteWithCallStack. K_Deferred }; CUDADiagBuilder(Kind K, SourceLocation Loc, unsigned DiagID, FunctionDecl *Fn, Sema &S); + ~CUDADiagBuilder(); /// Convertible to bool: True if we immediately emitted an error, false if /// we didn't emit an error or we created a deferred error. @@ -9309,38 +9337,29 @@ /// /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably /// want to use these instead of creating a CUDADiagBuilder yourself. - operator bool() const { return ImmediateDiagBuilder.hasValue(); } + operator bool() const { return ImmediateDiag.hasValue(); } template <typename T> friend const CUDADiagBuilder &operator<<(const CUDADiagBuilder &Diag, const T &Value) { - if (Diag.ImmediateDiagBuilder.hasValue()) - *Diag.ImmediateDiagBuilder << Value; - else if (Diag.PartialDiagInfo.hasValue()) - Diag.PartialDiagInfo->PD << Value; + if (Diag.ImmediateDiag.hasValue()) + *Diag.ImmediateDiag << Value; + else if (Diag.PartialDiag.hasValue()) + *Diag.PartialDiag << Value; return Diag; } private: - struct PartialDiagnosticInfo { - PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD, - FunctionDecl *Fn) - : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {} - - ~PartialDiagnosticInfo() { - S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)}); - } - - Sema &S; - SourceLocation Loc; - PartialDiagnostic PD; - FunctionDecl *Fn; - }; + Sema &S; + SourceLocation Loc; + unsigned DiagID; + FunctionDecl *Fn; + bool ShowCallStack; // Invariant: At most one of these Optionals has a value. // FIXME: Switch these to a Variant once that exists. - llvm::Optional<Sema::SemaDiagnosticBuilder> ImmediateDiagBuilder; - llvm::Optional<PartialDiagnosticInfo> PartialDiagInfo; + llvm::Optional<SemaDiagnosticBuilder> ImmediateDiag; + llvm::Optional<PartialDiagnostic> PartialDiag; }; /// Creates a CUDADiagBuilder that emits the diagnostic if the current context @@ -10016,4 +10035,30 @@ } // end namespace clang +namespace llvm { +/// FunctionDeclAndLoc is hashed looking only at its +/// CanonicalDeclPtr<FunctionDecl>. +template <> struct DenseMapInfo<clang::Sema::FunctionDeclAndLoc> { + using FunctionDeclAndLoc = clang::Sema::FunctionDeclAndLoc; + using BaseInfo = DenseMapInfo<clang::CanonicalDeclPtr<clang::FunctionDecl>>; + + static FunctionDeclAndLoc getEmptyKey() { + return {BaseInfo::getEmptyKey(), clang::SourceLocation()}; + } + + static FunctionDeclAndLoc getTombstoneKey() { + return {BaseInfo::getTombstoneKey(), clang::SourceLocation()}; + } + + static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { + return BaseInfo::getHashValue(FDL.FD); + } + + static bool isEqual(const FunctionDeclAndLoc &LHS, + const FunctionDeclAndLoc &RHS) { + return BaseInfo::isEqual(LHS.FD, RHS.FD); + } +}; +} // namespace llvm + #endif Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6704,6 +6704,7 @@ def err_deleted_inherited_ctor_use : Error< "constructor inherited by %0 from base class %1 is implicitly deleted">; +def note_called_by : Note<"called by %0">; def err_kern_type_not_void_return : Error< "kernel function type %0 must have void return type">; def err_kern_is_nonstatic_method : Error<
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits