Author: jlebar Date: Sun Oct 16 21:25:55 2016 New Revision: 284355 URL: http://llvm.org/viewvc/llvm-project?rev=284355&view=rev Log: [CUDA] Fix false-positive in known-emitted handling.
Previously: When compiling for host, our constructed call graph went *through* kernel calls. This meant that if we had host calls kernel calls HD we would incorrectly mark the HD function as known-emitted on the host side, and thus perform host-side checks on it. Fixing this exposed another issue, wherein when marking a function as known-emitted, we also need to traverse the callgraph of its template, because non-dependent calls are attached to a function's template, not its instantiation. Added: cfe/trunk/test/SemaCUDA/trace-through-global.cu Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=284355&r1=284354&r2=284355&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaCUDA.cpp (original) +++ cfe/trunk/lib/Sema/SemaCUDA.cpp Sun Oct 16 21:25:55 2016 @@ -644,10 +644,16 @@ static void MarkKnownEmitted(Sema &S, Fu S.CUDAKnownEmittedFns.insert(Caller); EmitDeferredDiags(S, Caller); - // Deferred diags are often emitted on the template itself, so emit those as - // well. - if (auto *Templ = Caller->getPrimaryTemplate()) - EmitDeferredDiags(S, Templ->getAsFunction()); + // If this is a template instantiation, explore its callgraph as well: + // Non-dependent calls are part of the template's callgraph, while dependent + // calls are part of to the instantiation's call graph. + if (auto *Templ = Caller->getPrimaryTemplate()) { + FunctionDecl *TemplFD = Templ->getAsFunction(); + if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { + Seen.insert(TemplFD); + Worklist.push_back(TemplFD); + } + } // Add all functions called by Caller to our worklist. auto CGIt = S.CUDACallGraph.find(Caller); @@ -676,11 +682,21 @@ bool Sema::CheckCUDACall(SourceLocation if (!Caller) return true; + // If the caller is known-emitted, mark the callee as known-emitted. + // Otherwise, mark the call in our call graph so we can traverse it later. bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); if (CallerKnownEmitted) MarkKnownEmitted(*this, Callee); - else - CUDACallGraph[Caller].insert(Callee); + else { + // If we have + // host fn calls kernel fn calls host+device, + // the HD function does not get instantiated on the host. We model this by + // omitting at the call to the kernel from the callgraph. This ensures + // that, when compiling for host, only HD functions actually called from the + // host get marked as known-emitted. + if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + CUDACallGraph[Caller].insert(Callee); + } CUDADiagBuilder::Kind DiagKind = [&] { switch (IdentifyCUDAPreference(Caller, Callee)) { Added: cfe/trunk/test/SemaCUDA/trace-through-global.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/trace-through-global.cu?rev=284355&view=auto ============================================================================== --- cfe/trunk/test/SemaCUDA/trace-through-global.cu (added) +++ cfe/trunk/test/SemaCUDA/trace-through-global.cu Sun Oct 16 21:25:55 2016 @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +// Check that it's OK for kernels to call HD functions that call device-only +// functions. + +#include "Inputs/cuda.h" + +__device__ void device_fn(int) {} +// expected-note@-1 {{declared here}} +// expected-note@-2 {{declared here}} + +inline __host__ __device__ int hd1() { + device_fn(0); // expected-error {{reference to __device__ function}} + return 0; +} + +inline __host__ __device__ int hd2() { + // No error here because hd2 is only referenced from a kernel. + device_fn(0); + return 0; +} + +inline __host__ __device__ void hd3(int) { + device_fn(0); // expected-error {{reference to __device__ function 'device_fn'}} +} +inline __host__ __device__ void hd3(double) {} + +inline __host__ __device__ void hd4(int) {} +inline __host__ __device__ void hd4(double) { + device_fn(0); // No error; this function is never called. +} + +__global__ void kernel(int) { hd2(); } + +template <typename T> +void launch_kernel() { + kernel<<<0, 0>>>(T()); + hd1(); + hd3(T()); +} + +void host_fn() { + launch_kernel<int>(); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits