from:"Yaxun Liu via cfe\-commits"

r340056 - [HIP] Make __hip_gpubin_handle hidden to avoid being merged across different shared libraries

2018-08-17 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Aug 17 10:47:31 2018
New Revision: 340056

URL: http://llvm.org/viewvc/llvm-project?rev=340056&view=rev
Log:
[HIP] Make __hip_gpubin_handle hidden to avoid being merged across different 
shared libraries

Different shared libraries contain different fat binary, which is stored in a 
global variable
__hip_gpubin_handle. Since different compilation units share the same fat 
binary, this
variable has linkonce linkage. However, it should not be merged across 
different shared
libraries.

This patch set the visibility of the global variable to be hidden, which will 
make it invisible
in the shared library, therefore preventing it from being merged.

Differential Revision: https://reviews.llvm.org/D50596

Modified:
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=340056&r1=340055&r2=340056&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Fri Aug 17 10:47:31 2018
@@ -459,6 +459,8 @@ llvm::Function *CGNVCUDARuntime::makeMod
 /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
 "__hip_gpubin_handle");
 GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+// Prevent the weak symbol in different shared libraries being merged.
+GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
 Address GpuBinaryAddr(
 GpuBinaryHandle,
 CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=340056&r1=340055&r2=340056&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Fri Aug 17 10:47:31 2018
@@ -80,7 +80,7 @@ void use_pointers() {
 // HIP-SAME: section ".hipFatBinSegment"
 // * variable to save GPU binary handle after initialization
 // CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
-// HIP: @__[[PREFIX]]_gpubin_handle = linkonce global i8** null
+// HIP: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
 // * constant unnamed string with NVModuleID
 // RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
 // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r331811 - [HIP] Add hip offload kind

2018-05-08 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May  8 14:02:12 2018
New Revision: 331811

URL: http://llvm.org/viewvc/llvm-project?rev=331811&view=rev
Log:
[HIP] Add hip offload kind

There are quite differences in HIP action builder and action job creation,
which justifies to define a separate offload kind.

Differential Revision: https://reviews.llvm.org/D46471

Modified:
cfe/trunk/include/clang/Driver/Action.h
cfe/trunk/lib/Driver/Action.cpp
cfe/trunk/lib/Driver/Compilation.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp

Modified: cfe/trunk/include/clang/Driver/Action.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Action.h?rev=331811&r1=331810&r2=331811&view=diff
==
--- cfe/trunk/include/clang/Driver/Action.h (original)
+++ cfe/trunk/include/clang/Driver/Action.h Tue May  8 14:02:12 2018
@@ -88,6 +88,7 @@ public:
 // The device offloading tool chains - one bit for each programming model.
 OFK_Cuda = 0x02,
 OFK_OpenMP = 0x04,
+OFK_HIP = 0x08,
   };
 
   static const char *getClassName(ActionClass AC);

Modified: cfe/trunk/lib/Driver/Action.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=331811&r1=331810&r2=331811&view=diff
==
--- cfe/trunk/lib/Driver/Action.cpp (original)
+++ cfe/trunk/lib/Driver/Action.cpp Tue May  8 14:02:12 2018
@@ -96,6 +96,8 @@ std::string Action::getOffloadingKindPre
 return "device-cuda";
   case OFK_OpenMP:
 return "device-openmp";
+  case OFK_HIP:
+return "device-hip";
 
 // TODO: Add other programming models here.
   }
@@ -104,8 +106,13 @@ std::string Action::getOffloadingKindPre
 return {};
 
   std::string Res("host");
+  assert(!((ActiveOffloadKindMask & OFK_Cuda) &&
+   (ActiveOffloadKindMask & OFK_HIP)) &&
+ "Cannot offload CUDA and HIP at the same time");
   if (ActiveOffloadKindMask & OFK_Cuda)
 Res += "-cuda";
+  if (ActiveOffloadKindMask & OFK_HIP)
+Res += "-hip";
   if (ActiveOffloadKindMask & OFK_OpenMP)
 Res += "-openmp";
 
@@ -142,6 +149,8 @@ StringRef Action::GetOffloadKindName(Off
 return "cuda";
   case OFK_OpenMP:
 return "openmp";
+  case OFK_HIP:
+return "hip";
 
 // TODO: Add other programming models here.
   }

Modified: cfe/trunk/lib/Driver/Compilation.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Compilation.cpp?rev=331811&r1=331810&r2=331811&view=diff
==
--- cfe/trunk/lib/Driver/Compilation.cpp (original)
+++ cfe/trunk/lib/Driver/Compilation.cpp Tue May  8 14:02:12 2018
@@ -196,10 +196,10 @@ static bool ActionFailed(const Action *A
   if (FailingCommands.empty())
 return false;
 
-  // CUDA can have the same input source code compiled multiple times so do not
-  // compiled again if there are already failures. It is OK to abort the CUDA
-  // pipeline on errors.
-  if (A->isOffloading(Action::OFK_Cuda))
+  // CUDA/HIP can have the same input source code compiled multiple times so do
+  // not compiled again if there are already failures. It is OK to abort the
+  // CUDA pipeline on errors.
+  if (A->isOffloading(Action::OFK_Cuda) || A->isOffloading(Action::OFK_HIP))
 return true;
 
   for (const auto &CI : FailingCommands)

Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=331811&r1=331810&r2=331811&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Tue May  8 14:02:12 2018
@@ -131,6 +131,10 @@ forAllAssociatedToolChains(Compilation &
 Work(*C.getSingleOffloadToolChain());
   else if (JA.isDeviceOffloading(Action::OFK_Cuda))
 Work(*C.getSingleOffloadToolChain());
+  else if (JA.isHostOffloading(Action::OFK_HIP))
+Work(*C.getSingleOffloadToolChain());
+  else if (JA.isDeviceOffloading(Action::OFK_HIP))
+Work(*C.getSingleOffloadToolChain());
 
   if (JA.isHostOffloading(Action::OFK_OpenMP)) {
 auto TCs = C.getOffloadToolChains();
@@ -3105,13 +3109,14 @@ void Clang::ConstructJob(Compilation &C,
   // Check number of inputs for sanity. We need at least one input.
   assert(Inputs.size() >= 1 && "Must have at least one input.");
   const InputInfo &Input = Inputs[0];
-  // CUDA compilation may have multiple inputs (source file + results of
+  // CUDA/HIP compilation may have multiple inputs (source file + results of
   // device-side compilations). OpenMP device jobs also take the host IR as a
   // second input. All other jobs are expected to have exactly one
   // input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
+  bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::O

r331895 - [OpenCL] Fix typos in emitted enqueue kernel function names

2018-05-09 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed May  9 10:07:06 2018
New Revision: 331895

URL: http://llvm.org/viewvc/llvm-project?rev=331895&view=rev
Log:
[OpenCL] Fix typos in emitted enqueue kernel function names

Two typos: 
vaarg => vararg
get_kernel_preferred_work_group_multiple => 
get_kernel_preferred_work_group_size_multiple

Differential Revision: https://reviews.llvm.org/D46601

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=331895&r1=331894&r2=331895&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May  9 10:07:06 2018
@@ -3164,10 +3164,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   return Ptr;
 };
 
-// Could have events and/or vaargs.
+// Could have events and/or varargs.
 if (E->getArg(3)->getType()->isBlockPointerType()) {
   // No events passed, but has variadic arguments.
-  Name = "__enqueue_kernel_vaargs";
+  Name = "__enqueue_kernel_varargs";
   auto Info =
   CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
   llvm::Value *Kernel =
@@ -3235,7 +3235,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   // Pass the number of variadics to the runtime function too.
   Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
   ArgTys.push_back(Int32Ty);
-  Name = "__enqueue_kernel_events_vaargs";
+  Name = "__enqueue_kernel_events_varargs";
 
   auto *PtrToSizeArray = CreateArrayForSizeVar(7);
   Args.push_back(PtrToSizeArray);
@@ -3276,7 +3276,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 CGM.CreateRuntimeFunction(
 llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, 
GenericVoidPtrTy},
 false),
-"__get_kernel_preferred_work_group_multiple_impl"),
+"__get_kernel_preferred_work_group_size_multiple_impl"),
 {Kernel, Arg}));
   }
   case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:

Modified: cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl?rev=331895&r1=331894&r2=331895&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl Wed May  9 
10:07:06 2018
@@ -88,7 +88,7 @@ kernel void device_side_enqueue(global i
   // B64: %[[TMP:.*]] = alloca [1 x i64]
   // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, 
i32 0
   // B64: store i64 256, i64* %[[TMP1]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs(
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], 
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} 
[[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ 
i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), 
i32 1,
@@ -109,7 +109,7 @@ kernel void device_side_enqueue(global i
   // B64: %[[TMP:.*]] = alloca [1 x i64]
   // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, 
i32 0
   // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs(
+  // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
   // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], 
%struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} 
[[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ 
i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), 
i32 1,
@@ -133,7 +133,7 @@ kernel void device_side_enqueue(global i
   // B64: %[[TMP:.*]] = alloca [1 x i64]
   // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, 
i32 0
   // B64: store i64 256, i64* %[[TMP1]], align 8
-  // COMMON-LABEL: call i32 @__enqueue_kernel_events_vaargs
+  // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
   // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]],  
%struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], 
%opencl.clk_event_t{{.*}} [[EVNT]],
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} 
[[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
   // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ 
i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*),

r332121 - [HIP] Let clang-offload-bundler support HIP

2018-05-11 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri May 11 12:02:18 2018
New Revision: 332121

URL: http://llvm.org/viewvc/llvm-project?rev=332121&view=rev
Log:
[HIP] Let clang-offload-bundler support HIP

When bundle/unbundle intermediate files for HIP, there may be multiple
sub archs, therefore BoundArch needs to be included in the target
and output file names for clang-offload-bundler.

Differential Revision: https://reviews.llvm.org/D46473

Modified:
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=332121&r1=332120&r2=332121&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Fri May 11 12:02:18 2018
@@ -3736,9 +3736,12 @@ InputInfo Driver::BuildJobsForActionNoCa
   UI.DependentToolChain->getTriple().normalize(),
   /*CreatePrefixForHost=*/true);
   auto CurI = InputInfo(
-  UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
- /*AtTopLevel=*/false, MultipleArchs,
- OffloadingPrefix),
+  UA,
+  GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
+ /*AtTopLevel=*/false,
+ MultipleArchs ||
+ UI.DependentOffloadKind == Action::OFK_HIP,
+ OffloadingPrefix),
   BaseInput);
   // Save the unbundling result.
   UnbundlingResults.push_back(CurI);

Modified: cfe/trunk/lib/Driver/ToolChains/Clang.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Clang.cpp?rev=332121&r1=332120&r2=332121&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/Clang.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Clang.cpp Fri May 11 12:02:18 2018
@@ -5542,6 +5542,10 @@ void OffloadBundler::ConstructJob(Compil
 Triples += Action::GetOffloadKindName(CurKind);
 Triples += '-';
 Triples += CurTC->getTriple().normalize();
+if (CurKind == Action::OFK_HIP && CurDep->getOffloadingArch()) {
+  Triples += '-';
+  Triples += CurDep->getOffloadingArch();
+}
   }
   CmdArgs.push_back(TCArgs.MakeArgString(Triples));
 
@@ -5611,6 +5615,11 @@ void OffloadBundler::ConstructJobMultipl
 Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind);
 Triples += '-';
 Triples += Dep.DependentToolChain->getTriple().normalize();
+if (Dep.DependentOffloadKind == Action::OFK_HIP &&
+!Dep.DependentBoundArch.empty()) {
+  Triples += '-';
+  Triples += Dep.DependentBoundArch;
+}
   }
 
   CmdArgs.push_back(TCArgs.MakeArgString(Triples));

Modified: cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp?rev=332121&r1=332120&r2=332121&view=diff
==
--- cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp (original)
+++ cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Fri May 11 
12:02:18 2018
@@ -969,11 +969,11 @@ int main(int argc, const char **argv) {
 getOffloadKindAndTriple(Target, Kind, Triple);
 
 bool KindIsValid = !Kind.empty();
-KindIsValid = KindIsValid &&
-  StringSwitch(Kind)
-  .Case("host", true)
-  .Case("openmp", true)
-  .Default(false);
+KindIsValid = KindIsValid && StringSwitch(Kind)
+ .Case("host", true)
+ .Case("openmp", true)
+ .Case("hip", true)
+ .Default(false);
 
 bool TripleIsValid = !Triple.empty();
 llvm::Triple T(Triple);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r332122 - [HIP] Diagnose unsupported host triple

2018-05-11 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri May 11 12:14:34 2018
New Revision: 332122

URL: http://llvm.org/viewvc/llvm-project?rev=332122&view=rev
Log:
[HIP] Diagnose unsupported host triple

Differential Revision: https://reviews.llvm.org/D46487

Modified:
cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/cuda-bad-arch.cu

Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td?rev=332122&r1=332121&r2=332122&view=diff
==
--- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td Fri May 11 12:14:34 
2018
@@ -40,7 +40,7 @@ def err_drv_cuda_version_unsupported : E
   "but installation at %3 is %4.  Use --cuda-path to specify a different CUDA "
   "install, pass a different GPU arch with --cuda-gpu-arch, or pass "
   "--no-cuda-version-check.">;
-def err_drv_cuda_nvptx_host : Error<"unsupported use of NVPTX for host 
compilation.">;
+def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host 
compilation.">;
 def err_drv_invalid_thread_model_for_target : Error<
   "invalid thread model '%0' in '%1' for this target">;
 def err_drv_invalid_linker_name : Error<

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=332122&r1=332121&r2=332122&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Fri May 11 12:14:34 2018
@@ -2338,11 +2338,13 @@ class OffloadingActionBuilder final {
 
   const ToolChain *HostTC = 
C.getSingleOffloadToolChain();
   assert(HostTC && "No toolchain for host compilation.");
-  if (HostTC->getTriple().isNVPTX()) {
-// We do not support targeting NVPTX for host compilation. Throw
+  if (HostTC->getTriple().isNVPTX() ||
+  HostTC->getTriple().getArch() == llvm::Triple::amdgcn) {
+// We do not support targeting NVPTX/AMDGCN for host compilation. Throw
 // an error and abort pipeline construction early so we don't trip
 // asserts that assume device-side compilation.
-C.getDriver().Diag(diag::err_drv_cuda_nvptx_host);
+C.getDriver().Diag(diag::err_drv_cuda_host_arch)
+<< HostTC->getTriple().getArchName();
 return true;
   }
 

Modified: cfe/trunk/test/Driver/cuda-bad-arch.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-bad-arch.cu?rev=332122&r1=332121&r2=332122&view=diff
==
--- cfe/trunk/test/Driver/cuda-bad-arch.cu (original)
+++ cfe/trunk/test/Driver/cuda-bad-arch.cu Fri May 11 12:14:34 2018
@@ -2,6 +2,7 @@
 // REQUIRES: clang-driver
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
 
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=compute_20 -c %s 
2>&1 \
 // RUN: | FileCheck -check-prefix BAD %s
@@ -25,9 +26,12 @@
 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix OK %s
 
-// We don't allow using NVPTX for host compilation.
+// We don't allow using NVPTX/AMDGCN for host compilation.
 // RUN: %clang -### --cuda-host-only -target nvptx-nvidia-cuda -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix HOST_NVPTX %s
+// RUN: %clang -### --cuda-host-only -target amdgcn-amd-amdhsa -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix HOST_AMDGCN %s
 
 // OK-NOT: error: Unsupported CUDA gpu architecture
-// HOST_NVPTX: error: unsupported use of NVPTX for host compilation.
+// HOST_NVPTX: error: unsupported architecture 'nvptx' for host compilation.
+// HOST_AMDGCN: error: unsupported architecture 'amdgcn' for host compilation.


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r332123 - [HIP] Set proper triple and offload kind for the toolchain

2018-05-11 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri May 11 12:21:39 2018
New Revision: 332123

URL: http://llvm.org/viewvc/llvm-project?rev=332123&view=rev
Log:
[HIP] Set proper triple and offload kind for the toolchain

Also introduce --hip-link option to indicate HIP for linking.

Differential Revision: https://reviews.llvm.org/D46475

Added:
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/a.cu
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/b.hip
cfe/trunk/test/Driver/hip-inputs.hip
Modified:
cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/include/clang/Driver/Types.h
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/lib/Driver/Types.cpp

Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td?rev=332123&r1=332122&r2=332123&view=diff
==
--- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td Fri May 11 12:21:39 
2018
@@ -41,6 +41,7 @@ def err_drv_cuda_version_unsupported : E
   "install, pass a different GPU arch with --cuda-gpu-arch, or pass "
   "--no-cuda-version-check.">;
 def err_drv_cuda_host_arch : Error<"unsupported architecture '%0' for host 
compilation.">;
+def err_drv_mix_cuda_hip : Error<"Mixed Cuda and HIP compilation is not 
supported.">;
 def err_drv_invalid_thread_model_for_target : Error<
   "invalid thread model '%0' in '%1' for this target">;
 def err_drv_invalid_linker_name : Error<

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=332123&r1=332122&r2=332123&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Fri May 11 12:21:39 2018
@@ -557,6 +557,8 @@ def no_cuda_include_ptx_EQ : Joined<["--
   HelpText<"Do not include PTX for the follwing GPU architecture (e.g. sm_35) 
or 'all'. May be specified more than once.">;
 def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">, Flags<[DriverOption]>,
   HelpText<"CUDA GPU architecture (e.g. sm_35).  May be specified more than 
once.">;
+def hip_link : Flag<["--"], "hip-link">,
+  HelpText<"Link clang-offload-bundler bundles for HIP">;
 def no_cuda_gpu_arch_EQ : Joined<["--"], "no-cuda-gpu-arch=">, 
Flags<[DriverOption]>,
   HelpText<"Remove GPU architecture (e.g. sm_35) from the list of GPUs to 
compile for. "
"'all' resets the list to its default value.">;

Modified: cfe/trunk/include/clang/Driver/Types.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.h?rev=332123&r1=332122&r2=332123&view=diff
==
--- cfe/trunk/include/clang/Driver/Types.h (original)
+++ cfe/trunk/include/clang/Driver/Types.h Fri May 11 12:21:39 2018
@@ -77,6 +77,9 @@ namespace types {
   /// isCuda - Is this a CUDA input.
   bool isCuda(ID Id);
 
+  /// isHIP - Is this a HIP input.
+  bool isHIP(ID Id);
+
   /// isObjC - Is this an "ObjC" input (Obj-C and Obj-C++ sources and headers).
   bool isObjC(ID Id);
 

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=332123&r1=332122&r2=332123&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Fri May 11 12:21:39 2018
@@ -538,24 +538,46 @@ void Driver::CreateOffloadingDeviceToolC
   InputList &Inputs) {
 
   //
-  // CUDA
+  // CUDA/HIP
   //
-  // We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
-  if (llvm::any_of(Inputs, [](std::pair &I) 
{
+  // We need to generate a CUDA toolchain if any of the inputs has a CUDA
+  // or HIP type. However, mixed CUDA/HIP compilation is not supported.
+  bool IsCuda =
+  llvm::any_of(Inputs, [](std::pair &I) 
{
 return types::isCuda(I.first);
-  })) {
+  });
+  bool IsHIP =
+  llvm::any_of(Inputs,
+   [](std::pair &I) {
+ return types::isHIP(I.first);
+   }) ||
+  C.getInputArgs().hasArg(options::OPT_hip_link);
+  if (IsCuda && IsHIP) {
+Diag(clang::diag::err_drv_mix_cuda_hip);
+return;
+  }
+  if (IsCuda || IsHIP) {
 const ToolChain *HostTC = C.getSingleOffloadToolChain();
 const llvm::Triple &HostTriple = HostTC->getTriple();
-llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda"
- : "nvptx-nvidia-cuda");
-// Use the CUDA and host triples as the key into t

r332279 - CodeGen: Emit string literal in constant address space

2018-05-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon May 14 12:20:12 2018
New Revision: 332279

URL: http://llvm.org/viewvc/llvm-project?rev=332279&view=rev
Log:
CodeGen: Emit string literal in constant address space

Some targets have constant address space (e.g. amdgcn). For them string literal 
should be
emitted in constant address space then casted to default address space.

Differential Revision: https://reviews.llvm.org/D46643

Added:
cfe/trunk/test/CodeGenCXX/amdgcn-string-literal.cpp
Modified:
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.h

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=332279&r1=332278&r2=332279&view=diff
==
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon May 14 12:20:12 2018
@@ -1374,7 +1374,7 @@ void CodeGenFunction::EmitAutoVarInit(co
 llvm::ConstantInt::get(IntPtrTy,

getContext().getTypeSizeInChars(type).getQuantity());
 
-  llvm::Type *BP = AllocaInt8PtrTy;
+  llvm::Type *BP = CGM.Int8Ty->getPointerTo(Loc.getAddressSpace());
   if (Loc.getType() != BP)
 Loc = Builder.CreateBitCast(Loc, BP);
 
@@ -1395,11 +1395,10 @@ void CodeGenFunction::EmitAutoVarInit(co
 // Otherwise, create a temporary global with the initializer then
 // memcpy from the global to the alloca.
 std::string Name = getStaticDeclName(CGM, D);
-unsigned AS = 0;
-if (getLangOpts().OpenCL) {
-  AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
-  BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
-}
+unsigned AS = CGM.getContext().getTargetAddressSpace(
+CGM.getStringLiteralAddressSpace());
+BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS);
+
 llvm::GlobalVariable *GV =
   new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true,
llvm::GlobalValue::PrivateLinkage,

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=332279&r1=332278&r2=332279&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon May 14 12:20:12 2018
@@ -3044,6 +3044,39 @@ LangAS CodeGenModule::GetGlobalVarAddres
   return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D);
 }
 
+LangAS CodeGenModule::getStringLiteralAddressSpace() const {
+  // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
+  if (LangOpts.OpenCL)
+return LangAS::opencl_constant;
+  if (auto AS = getTarget().getConstantAddressSpace())
+return AS.getValue();
+  return LangAS::Default;
+}
+
+// In address space agnostic languages, string literals are in default address
+// space in AST. However, certain targets (e.g. amdgcn) request them to be
+// emitted in constant address space in LLVM IR. To be consistent with other
+// parts of AST, string literal global variables in constant address space
+// need to be casted to default address space before being put into address
+// map and referenced by other part of CodeGen.
+// In OpenCL, string literals are in constant address space in AST, therefore
+// they should not be casted to default address space.
+static llvm::Constant *
+castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM,
+   llvm::GlobalVariable *GV) {
+  llvm::Constant *Cast = GV;
+  if (!CGM.getLangOpts().OpenCL) {
+if (auto AS = CGM.getTarget().getConstantAddressSpace()) {
+  if (AS != LangAS::Default)
+Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast(
+CGM, GV, AS.getValue(), LangAS::Default,
+GV->getValueType()->getPointerTo(
+CGM.getContext().getTargetAddressSpace(LangAS::Default)));
+}
+  }
+  return Cast;
+}
+
 template
 void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D,
llvm::GlobalValue *GV) {
@@ -4039,10 +4072,8 @@ static llvm::GlobalVariable *
 GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
   CodeGenModule &CGM, StringRef GlobalName,
   CharUnits Alignment) {
-  // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
-  unsigned AddrSpace = 0;
-  if (CGM.getLangOpts().OpenCL)
-AddrSpace = 
CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant);
+  unsigned AddrSpace = CGM.getContext().getTargetAddressSpace(
+  CGM.getStringLiteralAddressSpace());
 
   llvm::Module &M = CGM.getModule();
   // Create a global variable for this string
@@ -4104,7 +4135,9 @@ CodeGenModule::GetAddrOfConstantStringFr
 
   SanitizerMD->reportGlobal

r332593 - CodeGen: Fix invalid bitcast for lifetime.start/end

2018-05-17 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu May 17 04:16:35 2018
New Revision: 332593

URL: http://llvm.org/viewvc/llvm-project?rev=332593&view=rev
Log:
CodeGen: Fix invalid bitcast for lifetime.start/end

lifetime.start/end expects pointer argument in alloca address space.
However in C++ a temporary variable is in default address space.

This patch changes API CreateMemTemp and CreateTempAlloca to
get the original alloca instruction and pass it lifetime.start/end.

It only affects targets with non-zero alloca address space.

Differential Revision: https://reviews.llvm.org/D45900

Added:
cfe/trunk/test/CodeGenCXX/amdgcn_declspec_get.cpp
Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CGExprAgg.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=332593&r1=332592&r2=332593&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu May 17 04:16:35 2018
@@ -3812,16 +3812,17 @@ RValue CodeGenFunction::EmitCall(const C
   // If the call returns a temporary with struct return, create a temporary
   // alloca to hold the result, unless one is given to us.
   Address SRetPtr = Address::invalid();
+  Address SRetAlloca = Address::invalid();
   llvm::Value *UnusedReturnSizePtr = nullptr;
   if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
 if (!ReturnValue.isNull()) {
   SRetPtr = ReturnValue.getValue();
 } else {
-  SRetPtr = CreateMemTemp(RetTy);
+  SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca);
   if (HaveInsertPoint() && ReturnValue.isUnused()) {
 uint64_t size =
 CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
-UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getPointer());
+UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer());
   }
 }
 if (IRFunctionArgs.hasSRetArg()) {
@@ -3888,7 +3889,8 @@ RValue CodeGenFunction::EmitCall(const C
   if (!I->isAggregate()) {
 // Make a temporary alloca to pass the argument.
 Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "indirect-arg-temp", false);
+ "indirect-arg-temp", /*Alloca=*/nullptr,
+ /*Cast=*/false);
 IRCallArgs[FirstIRArg] = Addr.getPointer();
 
 I->copyInto(*this, Addr);
@@ -3934,7 +3936,8 @@ RValue CodeGenFunction::EmitCall(const C
 if (NeedCopy) {
   // Create an aligned temporary, and copy to it.
   Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "byval-temp", false);
+ "byval-temp", /*Alloca=*/nullptr,
+ /*Cast=*/false);
   IRCallArgs[FirstIRArg] = AI.getPointer();
   I->copyInto(*this, AI);
 } else {
@@ -4062,6 +4065,7 @@ RValue CodeGenFunction::EmitCall(const C
 
   llvm::Value *tempSize = nullptr;
   Address addr = Address::invalid();
+  Address AllocaAddr = Address::invalid();
   if (I->isAggregate()) {
 addr = I->hasLValue() ? I->getKnownLValue().getAddress()
   : I->getKnownRValue().getAggregateAddress();
@@ -4076,9 +4080,11 @@ RValue CodeGenFunction::EmitCall(const C
 
 // Materialize to a temporary.
 addr = CreateTempAlloca(RV.getScalarVal()->getType(),
- CharUnits::fromQuantity(std::max(layout->getAlignment(),
-  scalarAlign)));
-tempSize = EmitLifetimeStart(scalarSize, addr.getPointer());
+CharUnits::fromQuantity(std::max(
+layout->getAlignment(), scalarAlign)),
+"tmp",
+/*ArraySize=*/nullptr, &AllocaAddr);
+tempSize = EmitLifetimeStart(scalarSize, AllocaAddr.getPointer());
 
 Builder.CreateStore(RV.getScalarVal(), addr);
   }
@@ -4096,7 +4102,7 @@ RValue CodeGenFunction::EmitCall(const C
   assert(IRArgPos == FirstIRArg + NumIRArgs);
 
   if (tempSize) {
-EmitLifetimeEnd(tempSize, addr.getPointer());
+EmitLifetimeEnd(tempSize, AllocaAddr.getPointer());
   }
 
   break;
@@ -4258,7 +4264,7 @@ RValue CodeGenFunction::EmitCall(const C
   // pop this cleanup later on. Being eager about this is OK, since this
   // temporary is 'invisible' outside of the callee.
   if (UnusedReturnSizePtr)
-pushFullExprCleanup(NormalEHLifetimeMarker, SRetPtr,
+pushFullExprCleanup(NormalEHLifetimeMarker, SRetAlloca,
  Unuse

r332724 - [HIP] Support offloading by linker script

2018-05-18 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri May 18 08:07:56 2018
New Revision: 332724

URL: http://llvm.org/viewvc/llvm-project?rev=332724&view=rev
Log:
[HIP] Support offloading by linker script

To support linking device code in different source files, it is necessary to
embed fat binary at host linking stage.

This patch emits an external symbol for fat binary in host codegen, then
embed the fat binary by lld through a linker script.

Differential Revision: https://reviews.llvm.org/D46472

Modified:
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp
cfe/trunk/lib/Driver/ToolChains/CommonArgs.h
cfe/trunk/lib/Driver/ToolChains/Gnu.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=332724&r1=332723&r2=332724&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Fri May 18 08:07:56 2018
@@ -586,6 +586,8 @@ def fno_cuda_rdc : Flag<["-"], "fno-cuda
 def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address 
spaces.">;
 def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
+def fhip_dump_offload_linker_script : Flag<["-"], 
"fhip-dump-offload-linker-script">,
+  Group, Flags<[NoArgumentUnused, HelpHidden]>;
 def dA : Flag<["-"], "dA">, Group;
 def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>,
   HelpText<"Print macro definitions in -E mode in addition to normal output">;

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=332724&r1=332723&r2=332724&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Fri May 18 08:07:56 2018
@@ -27,6 +27,8 @@ using namespace clang;
 using namespace CodeGen;
 
 namespace {
+constexpr unsigned CudaFatMagic = 0x466243b1;
+constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
 
 class CGNVCUDARuntime : public CGCUDARuntime {
 
@@ -310,19 +312,20 @@ llvm::Function *CGNVCUDARuntime::makeReg
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
+  bool IsHIP = CGM.getLangOpts().HIP;
   // No need to generate ctors/dtors if there is no GPU binary.
-  std::string GpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
-  if (GpuBinaryFileName.empty())
+  StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
+  if (CudaGpuBinaryFileName.empty() && !IsHIP)
 return nullptr;
 
-  // void __cuda_register_globals(void* handle);
+  // void __{cuda|hip}_register_globals(void* handle);
   llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
   // We always need a function to pass in as callback. Create a dummy
   // implementation if we don't need to register anything.
   if (RelocatableDeviceCode && !RegisterGlobalsFunc)
 RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
 
-  // void ** __cudaRegisterFatBinary(void *);
+  // void ** __{cuda|hip}RegisterFatBinary(void *);
   llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
   llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
   addUnderscoredPrefixToName("RegisterFatBinary"));
@@ -334,12 +337,16 @@ llvm::Function *CGNVCUDARuntime::makeMod
   // global variable and save a reference in GpuBinaryHandle to be cleaned up
   // in destructor on exit. Then associate all known kernels with the GPU 
binary
   // handle so CUDA runtime can figure out what to call on the GPU side.
-  llvm::ErrorOr> GpuBinaryOrErr =
-  llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
-  if (std::error_code EC = GpuBinaryOrErr.getError()) {
-CGM.getDiags().Report(diag::err_cannot_open_file)
-<< GpuBinaryFileName << EC.message();
-return nullptr;
+  std::unique_ptr CudaGpuBinary;
+  if (!IsHIP) {
+llvm::ErrorOr> CudaGpuBinaryOrErr =
+llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
+if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
+  CGM.getDiags().Report(diag::err_cannot_open_file)
+  << CudaGpuBinaryFileName << EC.message();
+  return nullptr;
+}
+CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
   }
 
   llvm::Function *ModuleCtorFunc = llvm::Function::Create(
@@ -353,28 +360,60 @@ llvm::Function *CGNVCUDARuntime::makeMod
   CtorBuilder.SetInsertPoint(CtorEntryBB);
 
   const char *FatbinConstantName;
-  if (RelocatableDeviceCode)
+  const char *FatbinSectionName;
+  const char *ModuleIDSectionName;
+  StringRef ModuleIDPrefix;
+  llvm::Constant *FatBinStr;
+  unsigned FatMagic;
+  if (IsHIP) {
+FatbinConstantName = "

r332982 - Call CreateTempMemWithoutCast for ActiveFlag

2018-05-22 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May 22 07:36:26 2018
New Revision: 332982

URL: http://llvm.org/viewvc/llvm-project?rev=332982&view=rev
Log:
Call CreateTempMemWithoutCast for ActiveFlag

Introduced CreateMemTempWithoutCast and CreateTemporaryAllocaWithoutCast to 
emit alloca
without casting to default addr space.

ActiveFlag is a temporary variable emitted for clean up. It is defined as 
AllocaInst* type and there is
a cast to AlllocaInst in SetActiveFlag. An alloca casted to generic pointer 
causes assertion in
SetActiveFlag.

Since there is only load/store of ActiveFlag, it is safe to use the original 
alloca, therefore use
CreateMemTempWithoutCast is called.

Differential Revision: https://reviews.llvm.org/D47099

Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGCleanup.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=332982&r1=332981&r2=332982&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Tue May 22 07:36:26 2018
@@ -3888,9 +3888,8 @@ RValue CodeGenFunction::EmitCall(const C
   assert(NumIRArgs == 1);
   if (!I->isAggregate()) {
 // Make a temporary alloca to pass the argument.
-Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "indirect-arg-temp", /*Alloca=*/nullptr,
- /*Cast=*/false);
+Address Addr = CreateMemTempWithoutCast(
+I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
 IRCallArgs[FirstIRArg] = Addr.getPointer();
 
 I->copyInto(*this, Addr);
@@ -3935,9 +3934,8 @@ RValue CodeGenFunction::EmitCall(const C
 }
 if (NeedCopy) {
   // Create an aligned temporary, and copy to it.
-  Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "byval-temp", /*Alloca=*/nullptr,
- /*Cast=*/false);
+  Address AI = CreateMemTempWithoutCast(
+  I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
   IRCallArgs[FirstIRArg] = AI.getPointer();
   I->copyInto(*this, AI);
 } else {

Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCleanup.cpp?rev=332982&r1=332981&r2=332982&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCleanup.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCleanup.cpp Tue May 22 07:36:26 2018
@@ -283,8 +283,8 @@ void EHScopeStack::popNullFixups() {
 
 void CodeGenFunction::initFullExprCleanup() {
   // Create a variable to decide whether the cleanup needs to be run.
-  Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(),
-"cleanup.cond");
+  Address active = CreateTempAllocaWithoutCast(
+  Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond");
 
   // Initialize it to false at a site that's guaranteed to be run
   // before each evaluation.

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=332982&r1=332981&r2=332982&view=diff
==
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue May 22 07:36:26 2018
@@ -61,21 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastTo
 
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
+Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty,
+ CharUnits Align,
+ const Twine &Name,
+ llvm::Value *ArraySize) {
+  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
+  Alloca->setAlignment(Align.getQuantity());
+  return Address(Alloca, Align);
+}
+
+/// CreateTempAlloca - This creates a alloca and inserts it into the entry
+/// block. The alloca is casted to default address space if necessary.
 Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
   const Twine &Name,
   llvm::Value *ArraySize,
-  Address *AllocaAddr,
-  bool CastToDefaultAddrSpace) {
-  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
-  Alloca->setAlignment(Align.getQuantity());
+  Address *AllocaAddr) {
+  auto Alloca = CreateTempAllocaWithoutCast(Ty, Al

r332991 - Revert r332982 Call CreateTempMemWithoutCast for ActiveFlag

2018-05-22 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May 22 09:13:07 2018
New Revision: 332991

URL: http://llvm.org/viewvc/llvm-project?rev=332991&view=rev
Log:
Revert r332982 Call CreateTempMemWithoutCast for ActiveFlag

Due to regression on arm.

Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGCleanup.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=332991&r1=332990&r2=332991&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Tue May 22 09:13:07 2018
@@ -3888,8 +3888,9 @@ RValue CodeGenFunction::EmitCall(const C
   assert(NumIRArgs == 1);
   if (!I->isAggregate()) {
 // Make a temporary alloca to pass the argument.
-Address Addr = CreateMemTempWithoutCast(
-I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
+Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "indirect-arg-temp", /*Alloca=*/nullptr,
+ /*Cast=*/false);
 IRCallArgs[FirstIRArg] = Addr.getPointer();
 
 I->copyInto(*this, Addr);
@@ -3934,8 +3935,9 @@ RValue CodeGenFunction::EmitCall(const C
 }
 if (NeedCopy) {
   // Create an aligned temporary, and copy to it.
-  Address AI = CreateMemTempWithoutCast(
-  I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
+  Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "byval-temp", /*Alloca=*/nullptr,
+ /*Cast=*/false);
   IRCallArgs[FirstIRArg] = AI.getPointer();
   I->copyInto(*this, AI);
 } else {

Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCleanup.cpp?rev=332991&r1=332990&r2=332991&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCleanup.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCleanup.cpp Tue May 22 09:13:07 2018
@@ -283,8 +283,8 @@ void EHScopeStack::popNullFixups() {
 
 void CodeGenFunction::initFullExprCleanup() {
   // Create a variable to decide whether the cleanup needs to be run.
-  Address active = CreateTempAllocaWithoutCast(
-  Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond");
+  Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(),
+"cleanup.cond");
 
   // Initialize it to false at a site that's guaranteed to be run
   // before each evaluation.

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=332991&r1=332990&r2=332991&view=diff
==
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue May 22 09:13:07 2018
@@ -61,30 +61,21 @@ llvm::Value *CodeGenFunction::EmitCastTo
 
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
-Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty,
- CharUnits Align,
- const Twine &Name,
- llvm::Value *ArraySize) {
-  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
-  Alloca->setAlignment(Align.getQuantity());
-  return Address(Alloca, Align);
-}
-
-/// CreateTempAlloca - This creates a alloca and inserts it into the entry
-/// block. The alloca is casted to default address space if necessary.
 Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
   const Twine &Name,
   llvm::Value *ArraySize,
-  Address *AllocaAddr) {
-  auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
+  Address *AllocaAddr,
+  bool CastToDefaultAddrSpace) {
+  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
+  Alloca->setAlignment(Align.getQuantity());
   if (AllocaAddr)
-*AllocaAddr = Alloca;
-  llvm::Value *V = Alloca.getPointer();
+*AllocaAddr = Address(Alloca, Align);
+  llvm::Value *V = Alloca;
   // Alloca always returns a pointer in alloca address space, which may
   // be different from the type defined by the language. For example,
   // in C++ the auto variables are in the default address space. Therefore
   // cast alloca to the default address space when necessary.
-  if (getASTAllocaAddressSpace()

r333483 - Add action builder for HIP

2018-05-29 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May 29 17:49:10 2018
New Revision: 333483

URL: http://llvm.org/viewvc/llvm-project?rev=333483&view=rev
Log:
Add action builder for HIP

To support separate compile/link and linking across device IR in different 
source files,
a new HIP action builder is introduced. Basically it compiles/links host and 
device
code separately, and embed fat binary in host linking stage through linker 
script.

Differential Revision: https://reviews.llvm.org/D46476

Modified:
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/cuda-phases.cu

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=333483&r1=333482&r2=333483&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Tue May 29 17:49:10 2018
@@ -2151,9 +2151,10 @@ class OffloadingActionBuilder final {
 }
   };
 
-  /// CUDA action builder. It injects device code in the host backend
-  /// action.
-  class CudaActionBuilder final : public DeviceActionBuilder {
+  /// Base class for CUDA/HIP action builder. It injects device code in
+  /// the host backend action.
+  class CudaActionBuilderBase : public DeviceActionBuilder {
+  protected:
 /// Flags to signal if the user requested host-only or device-only
 /// compilation.
 bool CompileHostOnly = false;
@@ -2170,115 +2171,11 @@ class OffloadingActionBuilder final {
 
 /// Flag that is set to true if this builder acted on the current input.
 bool IsActive = false;
-
   public:
-CudaActionBuilder(Compilation &C, DerivedArgList &Args,
-  const Driver::InputList &Inputs)
-: DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
-
-ActionBuilderReturnCode
-getDeviceDependences(OffloadAction::DeviceDependences &DA,
- phases::ID CurPhase, phases::ID FinalPhase,
- PhasesTy &Phases) override {
-  if (!IsActive)
-return ABRT_Inactive;
-
-  // If we don't have more CUDA actions, we don't have any dependences to
-  // create for the host.
-  if (CudaDeviceActions.empty())
-return ABRT_Success;
-
-  assert(CudaDeviceActions.size() == GpuArchList.size() &&
- "Expecting one action per GPU architecture.");
-  assert(!CompileHostOnly &&
- "Not expecting CUDA actions in host-only compilation.");
-
-  // If we are generating code for the device or we are in a backend phase,
-  // we attempt to generate the fat binary. We compile each arch to ptx and
-  // assemble to cubin, then feed the cubin *and* the ptx into a device
-  // "link" action, which uses fatbinary to combine these cubins into one
-  // fatbin.  The fatbin is then an input to the host action if not in
-  // device-only mode.
-  if (CompileDeviceOnly || CurPhase == phases::Backend) {
-ActionList DeviceActions;
-for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
-  // Produce the device action from the current phase up to the 
assemble
-  // phase.
-  for (auto Ph : Phases) {
-// Skip the phases that were already dealt with.
-if (Ph < CurPhase)
-  continue;
-// We have to be consistent with the host final phase.
-if (Ph > FinalPhase)
-  break;
-
-CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
-C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);
-
-if (Ph == phases::Assemble)
-  break;
-  }
-
-  // If we didn't reach the assemble phase, we can't generate the fat
-  // binary. We don't need to generate the fat binary if we are not in
-  // device-only mode.
-  if (!isa(CudaDeviceActions[I]) ||
-  CompileDeviceOnly)
-continue;
-
-  Action *AssembleAction = CudaDeviceActions[I];
-  assert(AssembleAction->getType() == types::TY_Object);
-  assert(AssembleAction->getInputs().size() == 1);
-
-  Action *BackendAction = AssembleAction->getInputs()[0];
-  assert(BackendAction->getType() == types::TY_PP_Asm);
-
-  for (auto &A : {AssembleAction, BackendAction}) {
-OffloadAction::DeviceDependences DDep;
-DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
- Action::OFK_Cuda);
-DeviceActions.push_back(
-C.MakeAction(DDep, A->getType()));
-  }
-}
-
-// We generate the fat binary if we have device input actions.
-if (!DeviceActions.empty()) {
-  CudaFatBinary =
-  C.MakeAction(DeviceActions, 
types::TY_CUDA_FATBIN);
-
-  if (!CompileDeviceOnly) {
-DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
-

r333484 - Add HIP toolchain

2018-05-29 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May 29 17:53:50 2018
New Revision: 333484

URL: http://llvm.org/viewvc/llvm-project?rev=333484&view=rev
Log:
Add HIP toolchain

This patch adds HIP toolchain to support HIP language mode. It includes:

Create specific compiler jobs for HIP.

Choose specific libraries for HIP.

With contribution from Greg Rodgers.

Differential Revision: https://reviews.llvm.org/D45212

Added:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/lib/Driver/ToolChains/HIP.h
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib1/
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib1/lib1.bc
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib2/
cfe/trunk/test/Driver/Inputs/hip_multiple_inputs/lib2/lib2.bc
cfe/trunk/test/Driver/hip-toolchain.hip
Modified:
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/Driver/CMakeLists.txt
cfe/trunk/lib/Driver/Driver.cpp

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=333484&r1=333483&r2=333484&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Tue May 29 17:53:50 2018
@@ -588,6 +588,10 @@ def fno_cuda_rdc : Flag<["-"], "fno-cuda
 def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address 
spaces.">;
 def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
+def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, 
Group,
+  HelpText<"HIP device library path">;
+def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group,
+  HelpText<"HIP device library">;
 def fhip_dump_offload_linker_script : Flag<["-"], 
"fhip-dump-offload-linker-script">,
   Group, Flags<[NoArgumentUnused, HelpHidden]>;
 def dA : Flag<["-"], "dA">, Group;

Modified: cfe/trunk/lib/Driver/CMakeLists.txt
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/CMakeLists.txt?rev=333484&r1=333483&r2=333484&view=diff
==
--- cfe/trunk/lib/Driver/CMakeLists.txt (original)
+++ cfe/trunk/lib/Driver/CMakeLists.txt Tue May 29 17:53:50 2018
@@ -45,6 +45,7 @@ add_clang_library(clangDriver
   ToolChains/Fuchsia.cpp
   ToolChains/Gnu.cpp
   ToolChains/Haiku.cpp
+  ToolChains/HIP.cpp
   ToolChains/Hexagon.cpp
   ToolChains/Linux.cpp
   ToolChains/MipsLinux.cpp

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=333484&r1=333483&r2=333484&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Tue May 29 17:53:50 2018
@@ -12,6 +12,7 @@
 #include "ToolChains/AMDGPU.h"
 #include "ToolChains/AVR.h"
 #include "ToolChains/Ananas.h"
+#include "ToolChains/BareMetal.h"
 #include "ToolChains/Clang.h"
 #include "ToolChains/CloudABI.h"
 #include "ToolChains/Contiki.h"
@@ -22,15 +23,15 @@
 #include "ToolChains/FreeBSD.h"
 #include "ToolChains/Fuchsia.h"
 #include "ToolChains/Gnu.h"
-#include "ToolChains/BareMetal.h"
+#include "ToolChains/HIP.h"
 #include "ToolChains/Haiku.h"
 #include "ToolChains/Hexagon.h"
 #include "ToolChains/Lanai.h"
 #include "ToolChains/Linux.h"
+#include "ToolChains/MSVC.h"
 #include "ToolChains/MinGW.h"
 #include "ToolChains/Minix.h"
 #include "ToolChains/MipsLinux.h"
-#include "ToolChains/MSVC.h"
 #include "ToolChains/Myriad.h"
 #include "ToolChains/NaCl.h"
 #include "ToolChains/NetBSD.h"
@@ -70,9 +71,9 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/StringSaver.h"
 #include 
 #include 
 #include 
@@ -540,7 +541,7 @@ void Driver::CreateOffloadingDeviceToolC
   //
   // CUDA/HIP
   //
-  // We need to generate a CUDA toolchain if any of the inputs has a CUDA
+  // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
   // or HIP type. However, mixed CUDA/HIP compilation is not supported.
   bool IsCuda =
   llvm::any_of(Inputs, [](std::pair &I) 
{
@@ -556,21 +557,15 @@ void Driver::CreateOffloadingDeviceToolC
 Diag(clang::diag::err_drv_mix_cuda_hip);
 return;
   }
-  if (IsCuda || IsHIP) {
+  if (IsCuda) {
 const ToolChain *HostTC = C.getSingleOffloadToolChain();
 const llvm::Triple &HostTriple = HostTC->getTriple();
 StringRef DeviceTripleStr;
-auto OFK = IsHIP ? Action::OFK_HIP : Action::OFK_Cuda;
-if (IsHIP) {
-  // HIP is only supported on amdgcn.
-  DeviceTripleStr = "amdgcn-amd-amdhsa";
-} else {
-  // CUDA is only supported on nvptx.
-  DeviceTripleStr = HostTriple.isArch64

r314452 - [AMDGPU] Allow flexible register names in inline asm constraints

2017-09-28 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Sep 28 12:07:59 2017
New Revision: 314452

URL: http://llvm.org/viewvc/llvm-project?rev=314452&view=rev
Log:
[AMDGPU] Allow flexible register names in inline asm constraints

Currently AMDGPU inline asm only allow v and s as register names in constraints.

This patch allows the following register names in constraints: (n, m is 
unsigned integer, n < m)

v

s

{vn} or {v[n]}

{sn} or {s[n]}

{S} , where S is a special register name

{v[n:m]}

{s[n:m]}

Differential Revision: https://reviews.llvm.org/D37568

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=314452&r1=314451&r2=314452&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Thu Sep 28 12:07:59 2017
@@ -17,6 +17,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
 
@@ -115,17 +116,83 @@ public:
 return None;
   }
 
+  /// Accepted register names: (n, m is unsigned integer, n < m)
+  /// v
+  /// s
+  /// {vn}, {v[n]}
+  /// {sn}, {s[n]}
+  /// {S} , where S is a special register name
+  {v[n:m]}
+  /// {s[n:m]}
   bool validateAsmConstraint(const char *&Name,
  TargetInfo::ConstraintInfo &Info) const override {
-switch (*Name) {
-default:
-  break;
-case 'v': // vgpr
-case 's': // sgpr
+static const ::llvm::StringSet<> SpecialRegs({
+"exec", "vcc", "flat_scratch", "m0", "scc", "tba", "tma",
+"flat_scratch_lo", "flat_scratch_hi", "vcc_lo", "vcc_hi", "exec_lo",
+"exec_hi", "tma_lo", "tma_hi", "tba_lo", "tba_hi",
+});
+
+StringRef S(Name);
+bool HasLeftParen = false;
+if (S.front() == '{') {
+  HasLeftParen = true;
+  S = S.drop_front();
+}
+if (S.empty())
+  return false;
+if (S.front() != 'v' && S.front() != 's') {
+  if (!HasLeftParen)
+return false;
+  auto E = S.find('}');
+  if (!SpecialRegs.count(S.substr(0, E)))
+return false;
+  S = S.drop_front(E + 1);
+  if (!S.empty())
+return false;
+  // Found {S} where S is a special register.
+  Info.setAllowsRegister();
+  Name = S.data() - 1;
+  return true;
+}
+S = S.drop_front();
+if (!HasLeftParen) {
+  if (!S.empty())
+return false;
+  // Found s or v.
   Info.setAllowsRegister();
+  Name = S.data() - 1;
   return true;
 }
-return false;
+bool HasLeftBracket = false;
+if (!S.empty() && S.front() == '[') {
+  HasLeftBracket = true;
+  S = S.drop_front();
+}
+unsigned long long N;
+if (S.empty() || consumeUnsignedInteger(S, 10, N))
+  return false;
+if (!S.empty() && S.front() == ':') {
+  if (!HasLeftBracket)
+return false;
+  S = S.drop_front();
+  unsigned long long M;
+  if (consumeUnsignedInteger(S, 10, M) || N >= M)
+return false;
+}
+if (HasLeftBracket) {
+  if (S.empty() || S.front() != ']')
+return false;
+  S = S.drop_front();
+}
+if (S.empty() || S.front() != '}')
+  return false;
+S = S.drop_front();
+if (!S.empty())
+  return false;
+// Found {vn}, {sn}, {v[n]}, {s[n]}, {v[n:m]}, or {s[n:m]}.
+Info.setAllowsRegister();
+Name = S.data() - 1;
+return true;
   }
 
   bool

Modified: cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl?rev=314452&r1=314451&r2=314452&view=diff
==
--- cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl (original)
+++ cfe/trunk/test/Sema/inline-asm-validate-amdgpu.cl Thu Sep 28 12:07:59 2017
@@ -1,6 +1,7 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -triple amdgcn -fsyntax-only -verify %s
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 kernel void test () {
 
@@ -9,6 +10,67 @@ kernel void test () {
   // sgpr constraints
   __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : );
 
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : );
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // 
expected-error {{invalid input constraint '{exe' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // 
expected-error {{invalid input constraint '{exec' in asm}}
+  __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // 
expected-error {{invalid input constraint '{exec}a' in asm}}
+

r314802 - [OpenCL] Fix checking of vector type casting

2017-10-03 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct  3 07:34:29 2017
New Revision: 314802

URL: http://llvm.org/viewvc/llvm-project?rev=314802&view=rev
Log:
[OpenCL] Fix checking of vector type casting

Currently clang allows the following code

int a;
int b = (const int) a;
However it does not the following code

int4 a;
int4 b = (const int4) a;
This is because Clang compares the qualified types instead of unqualified types 
for vector type casting, which causes the inconsistency.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D38463

Modified:
cfe/trunk/lib/Sema/SemaExpr.cpp
cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl

Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=314802&r1=314801&r2=314802&view=diff
==
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Tue Oct  3 07:34:29 2017
@@ -6033,9 +6033,9 @@ ExprResult Sema::CheckExtVectorCast(Sour
   // In OpenCL, casts between vectors of different types are not allowed.
   // (See OpenCL 6.2).
   if (SrcTy->isVectorType()) {
-if (!areLaxCompatibleVectorTypes(SrcTy, DestTy)
-|| (getLangOpts().OpenCL &&
-(DestTy.getCanonicalType() != SrcTy.getCanonicalType( {
+if (!areLaxCompatibleVectorTypes(SrcTy, DestTy) ||
+(getLangOpts().OpenCL &&
+ !Context.hasSameUnqualifiedType(DestTy, SrcTy))) {
   Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors)
 << DestTy << SrcTy << R;
   return ExprError();

Modified: cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl?rev=314802&r1=314801&r2=314802&view=diff
==
--- cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl (original)
+++ cfe/trunk/test/SemaOpenCL/vector_conv_invalid.cl Tue Oct  3 07:34:29 2017
@@ -5,10 +5,18 @@ typedef int int4 __attribute((ext_vector
 typedef int int3 __attribute((ext_vector_type(3)));
 typedef unsigned uint3 __attribute((ext_vector_type(3)));
 
-void vector_conv_invalid() {
+void vector_conv_invalid(const global int4 *const_global_ptr) {
   uint4 u = (uint4)(1);
   int4 i = u; // expected-error{{initializing 'int4' (vector of 4 'int' 
values) with an expression of incompatible type 'uint4' (vector of 4 'unsigned 
int' values)}}
   int4 e = (int4)u; // expected-error{{invalid conversion between ext-vector 
type 'int4' (vector of 4 'int' values) and 'uint4' (vector of 4 'unsigned int' 
values)}}
 
   uint3 u4 = (uint3)u; // expected-error{{invalid conversion between 
ext-vector type 'uint3' (vector of 3 'unsigned int' values) and 'uint4' (vector 
of 4 'unsigned int' values)}}
+
+  e = (const int4)i;
+  e = (constant int4)i;
+  e = (private int4)i;
+
+  private int4 *private_ptr = (const private int4 *)const_global_ptr; // 
expected-error{{casting 'const __global int4 *' to type 'const int4 *' changes 
address space of pointer}}
+  global int4 *global_ptr = const_global_ptr; // 
expected-warning {{initializing '__global int4 *' with an expression of type 
'const __global int4 *' discards qualifiers}}
+  global_ptr = (global int4 *)const_global_ptr;
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r314932 - [OpenCL] Clean up and add missing fields for block struct

2017-10-04 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Oct  4 13:32:17 2017
New Revision: 314932

URL: http://llvm.org/viewvc/llvm-project?rev=314932&view=rev
Log:
[OpenCL] Clean up and add missing fields for block struct

Currently block is translated to a structure equivalent to

struct Block {
  void *isa;
  int flags;
  int reserved;
  void *invoke;
  void *descriptor;
};
Except invoke, which is the pointer to the block invoke function,
all other fields are useless for OpenCL, which clutter the IR and
also waste memory since the block struct is passed to the block
invoke function as argument.

On the other hand, the size and alignment of the block struct is
not stored in the struct, which causes difficulty to implement
__enqueue_kernel as library function, since the library function
needs to know the size and alignment of the argument which needs
to be passed to the kernel.

This patch removes the useless fields from the block struct and adds
size and align fields. The equivalent block struct will become

struct Block {
  int size;
  int align;
  generic void *invoke;
 /* custom fields */
};
It also changes the pointer to the invoke function to be
a generic pointer since the address space of a function
may not be private on certain targets.

Differential Revision: https://reviews.llvm.org/D37822

Removed:
cfe/trunk/test/CodeGen/blocks-opencl.cl
Modified:
cfe/trunk/lib/CodeGen/CGBlocks.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
cfe/trunk/lib/CodeGen/TargetInfo.h
cfe/trunk/test/CodeGenOpenCL/blocks.cl
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=314932&r1=314931&r2=314932&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Wed Oct  4 13:32:17 2017
@@ -14,11 +14,13 @@
 #include "CGBlocks.h"
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
+#include "CGOpenCLRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "TargetInfo.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
@@ -302,21 +304,55 @@ static CharUnits getLowBit(CharUnits v)
 
 static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
  SmallVectorImpl &elementTypes) {
-  // The header is basically 'struct { void *; int; int; void *; void *; }'.
-  // Assert that that struct is packed.
-  assert(CGM.getIntSize() <= CGM.getPointerSize());
-  assert(CGM.getIntAlign() <= CGM.getPointerAlign());
-  assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
-
-  info.BlockAlign = CGM.getPointerAlign();
-  info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize();
 
   assert(elementTypes.empty());
-  elementTypes.push_back(CGM.VoidPtrTy);
-  elementTypes.push_back(CGM.IntTy);
-  elementTypes.push_back(CGM.IntTy);
-  elementTypes.push_back(CGM.VoidPtrTy);
-  elementTypes.push_back(CGM.getBlockDescriptorType());
+  if (CGM.getLangOpts().OpenCL) {
+// The header is basically 'struct { int; int; generic void *;
+// custom_fields; }'. Assert that struct is packed.
+auto GenPtrAlign = CharUnits::fromQuantity(
+CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8);
+auto GenPtrSize = CharUnits::fromQuantity(
+CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8);
+assert(CGM.getIntSize() <= GenPtrSize);
+assert(CGM.getIntAlign() <= GenPtrAlign);
+assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
+elementTypes.push_back(CGM.IntTy); /* total size */
+elementTypes.push_back(CGM.IntTy); /* align */
+elementTypes.push_back(
+CGM.getOpenCLRuntime()
+.getGenericVoidPointerType()); /* invoke function */
+unsigned Offset =
+2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
+unsigned BlockAlign = GenPtrAlign.getQuantity();
+if (auto *Helper =
+CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+  for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
+// TargetOpenCLBlockHelp needs to make sure the struct is packed.
+// If necessary, add padding fields to the custom fields.
+unsigned Align = CGM.getDataLayout().getABITypeAlignment(I);
+if (BlockAlign < Align)
+  BlockAlign = Align;
+assert(Offset % Align == 0);
+Offset += CGM.getDataLayout().getTypeAllocSize(I);
+elementTypes.push_back(I);
+  }
+}
+info.BlockAlign = CharUnits::fromQuantity(BlockAlign);
+info.BlockSize = CharUnits::fromQuantity(Offset);
+  } else {
+// The header

r315668 - [OpenCL] Add LangAS::opencl_private to represent private address space in AST

2017-10-12 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Oct 12 20:37:48 2017
New Revision: 315668

URL: http://llvm.org/viewvc/llvm-project?rev=315668&view=rev
Log:
[OpenCL] Add LangAS::opencl_private to represent private address space in AST

Currently Clang uses default address space (0) to represent private address 
space for OpenCL
in AST. There are two issues with this:

Multiple address spaces including private address space cannot be diagnosed.
There is no mangling for default address space. For example, if private int* is 
emitted as
i32 addrspace(5)* in IR. It is supposed to be mangled as PUAS5i but it is 
mangled as
Pi instead.

This patch attempts to represent OpenCL private address space explicitly in 
AST. It adds
a new enum LangAS::opencl_private and adds it to the variable types which are 
implicitly
private:

automatic variables without address space qualifier

function parameter

pointee type without address space qualifier (OpenCL 1.2 and below)

Differential Revision: https://reviews.llvm.org/D35082

Removed:
cfe/trunk/test/SemaOpenCL/extern.cl
Modified:
cfe/trunk/include/clang/Basic/AddressSpaces.h
cfe/trunk/lib/AST/ASTContext.cpp
cfe/trunk/lib/AST/Expr.cpp
cfe/trunk/lib/AST/ItaniumMangle.cpp
cfe/trunk/lib/AST/TypePrinter.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/NVPTX.h
cfe/trunk/lib/Basic/Targets/SPIR.h
cfe/trunk/lib/Basic/Targets/TCE.h
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/lib/Sema/SemaDecl.cpp
cfe/trunk/lib/Sema/SemaType.cpp
cfe/trunk/test/CodeGenOpenCL/address-spaces-mangling.cl
cfe/trunk/test/CodeGenOpenCL/address-spaces.cl
cfe/trunk/test/SemaOpenCL/address-spaces.cl
cfe/trunk/test/SemaOpenCL/cl20-device-side-enqueue.cl
cfe/trunk/test/SemaOpenCL/storageclass-cl20.cl
cfe/trunk/test/SemaOpenCL/storageclass.cl
cfe/trunk/test/SemaTemplate/address_space-dependent.cpp

Modified: cfe/trunk/include/clang/Basic/AddressSpaces.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AddressSpaces.h?rev=315668&r1=315667&r2=315668&view=diff
==
--- cfe/trunk/include/clang/Basic/AddressSpaces.h (original)
+++ cfe/trunk/include/clang/Basic/AddressSpaces.h Thu Oct 12 20:37:48 2017
@@ -25,16 +25,17 @@ namespace LangAS {
 ///
 enum ID {
   // The default value 0 is the value used in QualType for the the situation
-  // where there is no address space qualifier. For most languages, this also
-  // corresponds to the situation where there is no address space qualifier in
-  // the source code, except for OpenCL, where the address space value 0 in
-  // QualType represents private address space in OpenCL source code.
+  // where there is no address space qualifier.
   Default = 0,
 
   // OpenCL specific address spaces.
+  // In OpenCL each l-value must have certain non-default address space, each
+  // r-value must have no address space (i.e. the default address space). The
+  // pointee of a pointer must have non-default address space.
   opencl_global,
   opencl_local,
   opencl_constant,
+  opencl_private,
   opencl_generic,
 
   // CUDA specific address spaces.

Modified: cfe/trunk/lib/AST/ASTContext.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=315668&r1=315667&r2=315668&view=diff
==
--- cfe/trunk/lib/AST/ASTContext.cpp (original)
+++ cfe/trunk/lib/AST/ASTContext.cpp Thu Oct 12 20:37:48 2017
@@ -707,6 +707,7 @@ static const LangAS::Map *getAddressSpac
   1, // opencl_global
   3, // opencl_local
   2, // opencl_constant
+  0, // opencl_private
   4, // opencl_generic
   5, // cuda_device
   6, // cuda_constant

Modified: cfe/trunk/lib/AST/Expr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=315668&r1=315667&r2=315668&view=diff
==
--- cfe/trunk/lib/AST/Expr.cpp (original)
+++ cfe/trunk/lib/AST/Expr.cpp Thu Oct 12 20:37:48 2017
@@ -3293,20 +3293,20 @@ Expr::isNullPointerConstant(ASTContext &
   // Check that it is a cast to void*.
   if (const PointerType *PT = CE->getType()->getAs()) {
 QualType Pointee = PT->getPointeeType();
-Qualifiers Q = Pointee.getQualifiers();
-// In OpenCL v2.0 generic address space acts as a placeholder
-// and should be ignored.
-bool IsASValid = true;
-if (Ctx.getLangOpts().OpenCLVersion >= 200) {
-  if (Pointee.getAddressSpace() == LangAS::opencl_generic)
-Q.removeAddressSpace();
-  else
-IsASValid = false;
-}
+// Only (void*)0 or equivalent are treated as nullptr. If pointee type
+// has non-default address space it is not treated as nullptr.
+// (__generic void*)0 in OpenCL 2.0 should not

r315678 - Fix regression of test/CodeGenOpenCL/address-spaces.cl on ppc

2017-10-13 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Oct 13 06:53:06 2017
New Revision: 315678

URL: http://llvm.org/viewvc/llvm-project?rev=315678&view=rev
Log:
Fix regression of test/CodeGenOpenCL/address-spaces.cl on ppc

Modified:
cfe/trunk/test/CodeGenOpenCL/address-spaces.cl

Modified: cfe/trunk/test/CodeGenOpenCL/address-spaces.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/address-spaces.cl?rev=315678&r1=315677&r2=315678&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/address-spaces.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/address-spaces.cl Fri Oct 13 06:53:06 2017
@@ -102,7 +102,8 @@ void test_struct() {
 // SPIR-LABEL: define void @test_void_par()
 void test_void_par(void) {}
 
-// SPIR-LABEL: define i32 @test_func_return_type()
+// On ppc64 returns signext i32.
+// SPIR-LABEL: define{{.*}} i32 @test_func_return_type()
 int test_func_return_type(void) {
   return 0;
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r315804 - [OpenCL] Emit enqueued block as kernel

2017-10-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Sat Oct 14 05:23:50 2017
New Revision: 315804

URL: http://llvm.org/viewvc/llvm-project?rev=315804&view=rev
Log:
[OpenCL] Emit enqueued block as kernel

In OpenCL the kernel function and non-kernel function has different calling 
conventions.
For certain targets they have different argument ABIs. Also kernels have 
special function
attributes and metadata for runtime to launch them.

The blocks passed to enqueue_kernel is supposed to be executed as kernels. As 
such,
the block invoke function should be emitted as kernel with proper calling 
convention and
argument ABI.

This patch emits enqueued block as kernel. If a block is both called directly 
and passed
to enqueue_kernel, separate functions will be generated.

Differential Revision: https://reviews.llvm.org/D38134

Added:
cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
Modified:
cfe/trunk/lib/CodeGen/CGBlocks.cpp
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/lib/CodeGen/CodeGenTypes.h
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/lib/CodeGen/TargetInfo.h
cfe/trunk/test/CodeGenOpenCL/blocks.cl
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=315804&r1=315803&r2=315804&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Sat Oct 14 05:23:50 2017
@@ -738,16 +738,27 @@ void CodeGenFunction::destroyBlockInfos(
 }
 
 /// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
+   llvm::Function **InvokeF) {
   // If the block has no captures, we won't have a pre-computed
   // layout for it.
   if (!blockExpr->getBlockDecl()->hasCaptures()) {
-if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr))
+// The block literal is emitted as a global variable, and the block invoke
+// function has to be extracted from its initializer.
+if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
+  if (InvokeF) {
+auto *GV = cast(
+cast(Block)->stripPointerCasts());
+auto *BlockInit = cast(GV->getInitializer());
+*InvokeF = cast(
+BlockInit->getAggregateElement(2)->stripPointerCasts());
+  }
   return Block;
+}
 CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
 computeBlockInfo(CGM, this, blockInfo);
 blockInfo.BlockExpression = blockExpr;
-return EmitBlockLiteral(blockInfo);
+return EmitBlockLiteral(blockInfo, InvokeF);
   }
 
   // Find the block info for this block and take ownership of it.
@@ -756,10 +767,11 @@ llvm::Value *CodeGenFunction::EmitBlockL
  blockExpr->getBlockDecl()));
 
   blockInfo->BlockExpression = blockExpr;
-  return EmitBlockLiteral(*blockInfo);
+  return EmitBlockLiteral(*blockInfo, InvokeF);
 }
 
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
+   llvm::Function **InvokeF) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
   auto GenVoidPtrTy =
   IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : 
VoidPtrTy;
@@ -768,9 +780,11 @@ llvm::Value *CodeGenFunction::EmitBlockL
   CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8);
   // Using the computed layout, generate the actual block function.
   bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
-  llvm::Constant *blockFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
+  auto *InvokeFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
   CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  blockFn = llvm::ConstantExpr::getPointerCast(blockFn, GenVoidPtrTy);
+  if (InvokeF)
+*InvokeF = InvokeFn;
+  auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
   if (blockInfo.CanBeGlobal)

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315804&r1=315803&r2=315804&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sat Oct 14 05:23:50 2017
@@ -2779,12 +2779,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   // The most basic form

r315805 - Fix build failure on android due to missing std::to_string()

2017-10-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Sat Oct 14 05:51:52 2017
New Revision: 315805

URL: http://llvm.org/viewvc/llvm-project?rev=315805&view=rev
Log:
Fix build failure on android due to missing std::to_string()

Modified:
cfe/trunk/lib/CodeGen/TargetInfo.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=315805&r1=315804&r2=315805&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Sat Oct 14 05:51:52 2017
@@ -25,6 +25,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
@@ -8994,7 +8995,7 @@ llvm::Function *AMDGPUTargetCodeGenInfo:
 ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
 ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
 ArgNames.push_back(
-llvm::MDString::get(C, std::string("local_arg") + std::to_string(I)));
+llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
   }
   std::string Name = Invoke->getName().str() + "_kernel";
   auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r316000 - CodeGen: Fix invalid bitcasts for atomic builtins

2017-10-17 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct 17 07:19:29 2017
New Revision: 316000

URL: http://llvm.org/viewvc/llvm-project?rev=316000&view=rev
Log:
CodeGen: Fix invalid bitcasts for atomic builtins

Currently clang assumes the temporary variables emitted during
codegen of atomic builtins have address space 0, which
is not true for target triple amdgcn---amdgiz and causes invalid
bitcasts.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D38966

Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp
cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=316000&r1=315999&r2=316000&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Tue Oct 17 07:19:29 2017
@@ -1226,7 +1226,8 @@ RValue CodeGenFunction::EmitAtomicExpr(A
   return RValue::get(nullptr);
 
 return convertTempToRValue(
-Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo(
+Dest.getAddressSpace())),
 RValTy, E->getExprLoc());
   }
 
@@ -1298,7 +1299,8 @@ RValue CodeGenFunction::EmitAtomicExpr(A
 
   assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
   return convertTempToRValue(
-  Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+  Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo(
+  Dest.getAddressSpace())),
   RValTy, E->getExprLoc());
 }
 

Modified: cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl?rev=316000&r1=315999&r2=316000&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl Tue Oct 17 07:19:29 2017
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - 
-triple=amdgcn-amd-amdhsa-opencl | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - 
-triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s
 
 // Also test serialization of atomic operations here, to avoid duplicating the 
test.
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t 
-triple=amdgcn-amd-amdhsa-opencl
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 
-triple=amdgcn-amd-amdhsa-opencl -emit-llvm -o - | opt -instnamer -S | 
FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t 
-triple=amdgcn-amd-amdhsa-amdgizcl
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 
-triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | 
FileCheck %s
 
 #ifndef ALREADY_INCLUDED
 #define ALREADY_INCLUDED
@@ -32,22 +32,22 @@ atomic_int j;
 
 void fi1(atomic_int *i) {
   // CHECK-LABEL: @fi1
-  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} 
syncscope("workgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") 
seq_cst
   int x = __opencl_atomic_load(i, memory_order_seq_cst, 
memory_scope_work_group);
 
-  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} 
syncscope("agent") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") 
seq_cst
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device);
 
-  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst
   x = __opencl_atomic_load(i, memory_order_seq_cst, 
memory_scope_all_svm_devices);
 
-  // CHECK: load atomic i32, i32 addrspace(4)* %{{[.0-9A-Z_a-z]+}} 
syncscope("subgroup") seq_cst
+  // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") 
seq_cst
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group);
 }
 
 void fi2(atomic_int *i) {
   // CHECK-LABEL: @fi2
-  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(4)* 
%{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} 
syncscope("workgroup") seq_cst
   __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
 }
 
@@ -56,7 +56,7 @@ void test_addr(global atomic_int *ig, pr
   // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* 
%{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
   __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} 
syncscope("workgroup") seq_cst
+  // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* 
%{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst
   __opencl_atomic

r316165 - [AMDGPU] Fix bug in enqueued block codegen due to an extra line

2017-10-19 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Oct 19 08:56:13 2017
New Revision: 316165

URL: http://llvm.org/viewvc/llvm-project?rev=316165&view=rev
Log:
[AMDGPU] Fix bug in enqueued block codegen due to an extra line

Modified:
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=316165&r1=316164&r2=316165&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Thu Oct 19 08:56:13 2017
@@ -8985,7 +8985,6 @@ llvm::Function *AMDGPUTargetCodeGenInfo:
   ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
   for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
 ArgTys.push_back(InvokeFT->getParamType(I));
-ArgTys.push_back(BlockTy);
 ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
 AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
 AccessQuals.push_back(llvm::MDString::get(C, "none"));

Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl?rev=316165&r1=316164&r2=316165&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl Thu Oct 19 08:56:13 
2017
@@ -18,6 +18,12 @@ kernel void test(global char *a, char b,
  a[0] = b;
  c[0] = d;
  });
+  enqueue_kernel(default_queue, flags, ndrange,
+ ^(local void *lp) {
+ a[0] = b;
+ c[0] = d;
+ ((local int*)lp)[0] = 1;
+ }, 100);
 }
 
 // CHECK-LABEL: define internal amdgpu_kernel void 
@__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 
}>)
@@ -33,4 +39,7 @@ kernel void test(global char *a, char b,
 // CHECK-LABEL: define internal amdgpu_kernel void 
@__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, 
i64 addrspace(1)*, i64, i8 }>)
 // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} 
!kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type 
!{{.*}} !kernel_arg_type_qual !{{.*}}
 
+// CHECK-LABEL: define internal amdgpu_kernel void 
@__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, 
i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} 
!kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type 
!{{.*}} !kernel_arg_type_qual !{{.*}}
+
 // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r316353 - CodeGen: Fix invalid bitcast in partial initialization of automatic arrary variable

2017-10-23 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Oct 23 10:49:26 2017
New Revision: 316353

URL: http://llvm.org/viewvc/llvm-project?rev=316353&view=rev
Log:
CodeGen: Fix invalid bitcast in partial initialization of automatic arrary 
variable

Differential Revision: https://reviews.llvm.org/D39184

Modified:
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=316353&r1=316352&r2=316353&view=diff
==
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon Oct 23 10:49:26 2017
@@ -1266,7 +1266,7 @@ void CodeGenFunction::EmitAutoVarInit(co
 llvm::ConstantInt::get(IntPtrTy,

getContext().getTypeSizeInChars(type).getQuantity());
 
-  llvm::Type *BP = Int8PtrTy;
+  llvm::Type *BP = AllocaInt8PtrTy;
   if (Loc.getType() != BP)
 Loc = Builder.CreateBitCast(Loc, BP);
 

Modified: cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl?rev=316353&r1=316352&r2=316353&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgcn-automatic-variable.cl Mon Oct 23 
10:49:26 2017
@@ -58,3 +58,11 @@ void func2(void) {
   const int lvc = 4;
   lv1 = lvc;
 }
+
+// CHECK-LABEL: define void @func3()
+// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5)
+// CHECK: %[[CAST:.+]] = bitcast [16 x [1 x float]] addrspace(5)* %a to i8 
addrspace(5)*
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* %[[CAST]], i8 0, 
i64 64, i32 4, i1 false)
+void func3(void) {
+  float a[16][1] = {{0.}};
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r316484 - CodeGen: Fix missing debug loc due to alloca

2017-10-24 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct 24 12:14:43 2017
New Revision: 316484

URL: http://llvm.org/viewvc/llvm-project?rev=316484&view=rev
Log:
CodeGen: Fix missing debug loc due to alloca

Builder save/restores insertion pointer when emitting addr space cast
for alloca, but does not save/restore debug loc, which causes verifier
failure for certain call instructions.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D39069

Added:
cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl
Modified:
cfe/trunk/lib/CodeGen/CGExpr.cpp

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=316484&r1=316483&r2=316484&view=diff
==
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue Oct 24 12:14:43 2017
@@ -74,12 +74,11 @@ Address CodeGenFunction::CreateTempAlloc
   // cast alloca to the default address space when necessary.
   if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) 
{
 auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
-auto CurIP = Builder.saveIP();
+llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
 Builder.SetInsertPoint(AllocaInsertPt);
 V = getTargetHooks().performAddrSpaceCast(
 *this, V, getASTAllocaAddressSpace(), LangAS::Default,
 Ty->getPointerTo(DestAddrSpace), /*non-null*/ true);
-Builder.restoreIP(CurIP);
   }
 
   return Address(V, Align);

Added: cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl?rev=316484&view=auto
==
--- cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/func-call-dbg-loc.cl Tue Oct 24 12:14:43 2017
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -debug-info-kind=limited -O0 
-emit-llvm -o - %s | FileCheck %s
+
+typedef struct
+{
+int a;
+} Struct;
+
+Struct func1();
+
+void func2(Struct S);
+
+void func3()
+{
+// CHECK: call i32 @func1() #{{[0-9]+}}, !dbg ![[LOC:[0-9]+]]
+// CHECK: call void @func2(i32 %{{[0-9]+}}) #{{[0-9]+}}, !dbg ![[LOC]]
+func2(func1());
+}
+


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r340873 - [HIP] Fix output file extension

2018-08-28 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Aug 28 14:09:09 2018
New Revision: 340873

URL: http://llvm.org/viewvc/llvm-project?rev=340873&view=rev
Log:
[HIP] Fix output file extension

OffloadBundlingJobAction constructor accepts a list of JobAction as inputs.
The host JobAction is the last one. The file type of OffloadBundlingJobAction
should be determined by the host JobAction (the last one) instead of the first
one.

Since HIP emits LLVM bitcode for device compilation, device JobAction has
different file type as host Job Action. This bug causes incorrect output file
extension for HIP.

This patch fixes it by using the last input JobAction (host JobAction) to 
determine
file type of OffloadBundlingJobAction.

Differential Revision: https://reviews.llvm.org/D51336

Added:
cfe/trunk/test/Driver/hip-output-file-name.hip
Modified:
cfe/trunk/lib/Driver/Action.cpp

Modified: cfe/trunk/lib/Driver/Action.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Action.cpp?rev=340873&r1=340872&r2=340873&view=diff
==
--- cfe/trunk/lib/Driver/Action.cpp (original)
+++ cfe/trunk/lib/Driver/Action.cpp Tue Aug 28 14:09:09 2018
@@ -382,7 +382,7 @@ VerifyPCHJobAction::VerifyPCHJobAction(A
 void OffloadBundlingJobAction::anchor() {}
 
 OffloadBundlingJobAction::OffloadBundlingJobAction(ActionList &Inputs)
-: JobAction(OffloadBundlingJobClass, Inputs, Inputs.front()->getType()) {}
+: JobAction(OffloadBundlingJobClass, Inputs, Inputs.back()->getType()) {}
 
 void OffloadUnbundlingJobAction::anchor() {}
 

Added: cfe/trunk/test/Driver/hip-output-file-name.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-output-file-name.hip?rev=340873&view=auto
==
--- cfe/trunk/test/Driver/hip-output-file-name.hip (added)
+++ cfe/trunk/test/Driver/hip-output-file-name.hip Tue Aug 28 14:09:09 2018
@@ -0,0 +1,9 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -c -target x86_64-linux-gnu \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: {{.*}}clang-offload-bundler{{.*}}"-outputs=hip-output-file-name.o"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r340967 - Add predefined macro __gnu_linux__ for proper aux-triple

2018-08-29 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Aug 29 13:39:22 2018
New Revision: 340967

URL: http://llvm.org/viewvc/llvm-project?rev=340967&view=rev
Log:
Add predefined macro __gnu_linux__ for proper aux-triple

Clang predefine macro __linx__ for aux-triple with Linux OS
but does not predefine macro __gnu_linux__. This causes
some compilation error for certain applications, e.g. Eigen.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D51441

Modified:
cfe/trunk/lib/Frontend/InitPreprocessor.cpp
cfe/trunk/test/Preprocessor/predefined-macros.c

Modified: cfe/trunk/lib/Frontend/InitPreprocessor.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/InitPreprocessor.cpp?rev=340967&r1=340966&r2=340967&view=diff
==
--- cfe/trunk/lib/Frontend/InitPreprocessor.cpp (original)
+++ cfe/trunk/lib/Frontend/InitPreprocessor.cpp Wed Aug 29 13:39:22 2018
@@ -1128,6 +1128,7 @@ static void InitializePredefinedAuxMacro
   if (AuxTriple.getOS() == llvm::Triple::Linux) {
 Builder.defineMacro("__ELF__");
 Builder.defineMacro("__linux__");
+Builder.defineMacro("__gnu_linux__");
 // Used in features.h. If this is omitted, math.h doesn't declare float
 // versions of the functions in bits/mathcalls.h.
 if (LangOpts.CPlusPlus)

Modified: cfe/trunk/test/Preprocessor/predefined-macros.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/predefined-macros.c?rev=340967&r1=340966&r2=340967&view=diff
==
--- cfe/trunk/test/Preprocessor/predefined-macros.c (original)
+++ cfe/trunk/test/Preprocessor/predefined-macros.c Wed Aug 29 13:39:22 2018
@@ -183,9 +183,11 @@
 // CHECK-HIP: #define __HIP__ 1
 
 // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device \
+// RUN:   -aux-triple x86_64-unknown-linux -fcuda-is-device \
 // RUN:   | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP-DEV
 // CHECK-HIP-DEV-NOT: #define __CUDA_ARCH__
 // CHECK-HIP-DEV: #define __HIPCC__ 1
 // CHECK-HIP-DEV: #define __HIP_DEVICE_COMPILE__ 1
 // CHECK-HIP-DEV: #define __HIP__ 1
+// CHECK_HIP-DEV: #define __linux__ 1
+// CHECK_HIP-DEV: #define __gnu_linux__ 1


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r341077 - [HIP] Add -fvisibility hidden option to clang

2018-08-30 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Aug 30 08:10:20 2018
New Revision: 341077

URL: http://llvm.org/viewvc/llvm-project?rev=341077&view=rev
Log:
[HIP] Add -fvisibility hidden option to clang

AMDGPU target need -fvisibility hidden option for clang to
work around a limitation of no PLT support, otherwise there is compilation
error at -O0.

Differential Revision: https://reviews.llvm.org/D51434

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/test/Driver/hip-toolchain.hip

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=341077&r1=341076&r2=341077&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Aug 30 08:10:20 2018
@@ -247,6 +247,12 @@ void HIPToolChain::addClangTargetOptions
   if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
  false))
 CC1Args.push_back("-fcuda-rdc");
+
+  // Default to "hidden" visibility, as object level linking will not be
+  // supported for the foreseeable future.
+  if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
+ options::OPT_fvisibility_ms_compat))
+CC1Args.append({"-fvisibility", "hidden"});
 }
 
 llvm::opt::DerivedArgList *

Modified: cfe/trunk/test/Driver/hip-toolchain.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain.hip?rev=341077&r1=341076&r2=341077&view=diff
==
--- cfe/trunk/test/Driver/hip-toolchain.hip (original)
+++ cfe/trunk/test/Driver/hip-toolchain.hip Thu Aug 30 08:10:20 2018
@@ -15,13 +15,15 @@
 // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" 
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
-// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip"
+// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
+// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
 // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
-// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip"
+// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
+// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
 
 // CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]]


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r337540 - Sema: Fix explicit address space cast in C++

2018-07-20 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jul 20 04:32:51 2018
New Revision: 337540

URL: http://llvm.org/viewvc/llvm-project?rev=337540&view=rev
Log:
Sema: Fix explicit address space cast in C++

Currently clang does not allow implicit cast of a pointer to a pointer type
in different address space but allows C-style cast of a pointer to a pointer
type in different address space. However, there is a bug in Sema causing
incorrect Cast Expr in AST for the latter case, which in turn results in
invalid LLVM IR in codegen.

This is because Sema::IsQualificationConversion returns true for a cast of
pointer to a pointer type in different address space, which in turn allows
a standard conversion and results in a cast expression with no op in AST.

This patch fixes that by let Sema::IsQualificationConversion returns false
for a cast of pointer to a pointer type in different address space, which
in turn disallows standard conversion, implicit cast, and static cast.
Finally it results in an reinterpret cast and correct conversion kind is set.

Differential Revision: https://reviews.llvm.org/D49294

Added:
cfe/trunk/test/CodeGenCXX/address-space-cast.cpp
Modified:
cfe/trunk/lib/Sema/SemaCast.cpp
cfe/trunk/lib/Sema/SemaOverload.cpp

Modified: cfe/trunk/lib/Sema/SemaCast.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCast.cpp?rev=337540&r1=337539&r2=337540&view=diff
==
--- cfe/trunk/lib/Sema/SemaCast.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCast.cpp Fri Jul 20 04:32:51 2018
@@ -1955,6 +1955,12 @@ static bool fixOverloadedReinterpretCast
   return Result.isUsable();
 }
 
+static bool IsAddressSpaceConversion(QualType SrcType, QualType DestType) {
+  return SrcType->isPointerType() && DestType->isPointerType() &&
+ SrcType->getAs()->getPointeeType().getAddressSpace() !=
+ 
DestType->getAs()->getPointeeType().getAddressSpace();
+}
+
 static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
 QualType DestType, bool CStyle,
 SourceRange OpRange,
@@ -2198,6 +2204,8 @@ static TryCastResult TryReinterpretCast(
 } else {
   Kind = CK_BitCast;
 }
+  } else if (IsAddressSpaceConversion(SrcType, DestType)) {
+Kind = CK_AddressSpaceConversion;
   } else {
 Kind = CK_BitCast;
   }

Modified: cfe/trunk/lib/Sema/SemaOverload.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOverload.cpp?rev=337540&r1=337539&r2=337540&view=diff
==
--- cfe/trunk/lib/Sema/SemaOverload.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOverload.cpp Fri Jul 20 04:32:51 2018
@@ -3150,6 +3150,15 @@ Sema::IsQualificationConversion(QualType
   = PreviousToQualsIncludeConst && ToQuals.hasConst();
   }
 
+  // Allows address space promotion by language rules implemented in
+  // Type::Qualifiers::isAddressSpaceSupersetOf.
+  Qualifiers FromQuals = FromType.getQualifiers();
+  Qualifiers ToQuals = ToType.getQualifiers();
+  if (!ToQuals.isAddressSpaceSupersetOf(FromQuals) &&
+  !FromQuals.isAddressSpaceSupersetOf(ToQuals)) {
+return false;
+  }
+
   // We are left with FromType and ToType being the pointee types
   // after unwrapping the original FromType and ToType the same number
   // of types. If we unwrapped any pointers, and if FromType and

Added: cfe/trunk/test/CodeGenCXX/address-space-cast.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/address-space-cast.cpp?rev=337540&view=auto
==
--- cfe/trunk/test/CodeGenCXX/address-space-cast.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/address-space-cast.cpp Fri Jul 20 04:32:51 2018
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
+
+#define __private__ __attribute__((address_space(5)))
+
+void func_pchar(__private__ char *x);
+
+void test_cast(char *gen_ptr) {
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
+  __private__ char *priv_ptr = (__private__ char *)gen_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: call void @_Z10func_pcharPU3AS5c(i8 addrspace(5)* %[[cast]])
+  func_pchar((__private__ char *)gen_ptr);
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r337631 - [HIP] Register/unregister device fat binary only once

2018-07-20 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jul 20 15:45:24 2018
New Revision: 337631

URL: http://llvm.org/viewvc/llvm-project?rev=337631&view=rev
Log:
[HIP] Register/unregister device fat binary only once

HIP generates one fat binary for all devices after linking. However, for each 
compilation
unit a ctor function is emitted which register the same fat binary. Measures 
need to be
taken to make sure the fat binary is only registered once.

Currently each ctor function calls __hipRegisterFatBinary and stores the 
returned value
to __hip_gpubin_handle. This patch changes the linkage of __hip_gpubin_handle 
to be linkonce
so that they are shared between LLVM modules. Then this patch adds check of 
value of
__hip_gpubin_handle to make sure __hipRegisterFatBinary is only called once. 
The code
is equivalent to

void *_gpubin_handle;
void ctor() {
  if (__hip_gpubin_handle == 0) {
__hip_gpubin_handle = __hipRegisterFatBinary(...);
  }
  // register kernels and variables.
}
The patch also does similar change to dtors so that __hipUnregisterFatBinary
is called once.

Differential Revision: https://reviews.llvm.org/D49083

Modified:
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=337631&r1=337630&r2=337631&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Fri Jul 20 15:45:24 2018
@@ -309,12 +309,24 @@ llvm::Function *CGNVCUDARuntime::makeReg
 }
 
 /// Creates a global constructor function for the module:
+///
+/// For CUDA:
 /// \code
 /// void __cuda_module_ctor(void*) {
 /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
 /// __cuda_register_globals(Handle);
 /// }
 /// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_ctor(void*) {
+/// if (__hip_gpubin_handle == 0) {
+/// __hip_gpubin_handle  = __hipRegisterFatBinary(GpuBinaryBlob);
+/// __hip_register_globals(__hip_gpubin_handle);
+/// }
+/// }
+/// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
   bool IsHIP = CGM.getLangOpts().HIP;
   // No need to generate ctors/dtors if there is no GPU binary.
@@ -427,22 +439,68 @@ llvm::Function *CGNVCUDARuntime::makeMod
   /*constant*/ true);
   FatbinWrapper->setSection(FatbinSectionName);
 
-  // Register binary with CUDA/HIP runtime. This is substantially different in
-  // default mode vs. separate compilation!
-  if (!RelocatableDeviceCode) {
-// GpuBinaryHandle = __{cuda|hip}RegisterFatBinary(&FatbinWrapper);
+  // There is only one HIP fat binary per linked module, however there are
+  // multiple constructor functions. Make sure the fat binary is registered
+  // only once. The constructor functions are executed by the dynamic loader
+  // before the program gains control. The dynamic loader cannot execute the
+  // constructor functions concurrently since doing that would not guarantee
+  // thread safety of the loaded program. Therefore we can assume sequential
+  // execution of constructor functions here.
+  if (IsHIP) {
+llvm::BasicBlock *IfBlock =
+llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
+llvm::BasicBlock *ExitBlock =
+llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
+// The name, size, and initialization pattern of this variable is part
+// of HIP ABI.
+GpuBinaryHandle = new llvm::GlobalVariable(
+TheModule, VoidPtrPtrTy, /*isConstant=*/false,
+llvm::GlobalValue::LinkOnceAnyLinkage,
+/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
+"__hip_gpubin_handle");
+GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
+Address GpuBinaryAddr(
+GpuBinaryHandle,
+CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
+{
+  auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+  llvm::Constant *Zero =
+  llvm::Constant::getNullValue(HandleValue->getType());
+  llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
+  CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
+}
+{
+  CtorBuilder.SetInsertPoint(IfBlock);
+  // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
+  llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
+  RegisterFatbinFunc,
+  CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
+  CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
+  CtorBuilder.CreateBr(ExitBlock);
+}
+{
+  CtorBuilder.SetInsertPoint(ExitBlock);
+  // Call __hip_register_globals(GpuBinaryHandle);
+  if (RegisterGlobalsFunc) {
+auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
+CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
+  }
+}
+  } else if (!Relocat

r337639 - [HIP] Support -fcuda-flush-denormals-to-zero for amdgcn

2018-07-20 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jul 20 19:02:22 2018
New Revision: 337639

URL: http://llvm.org/viewvc/llvm-project?rev=337639&view=rev
Log:
[HIP] Support -fcuda-flush-denormals-to-zero for amdgcn

Differential Revision: https://reviews.llvm.org/D48287

Modified:
cfe/trunk/include/clang/Basic/LangOptions.def
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/test/CodeGenCUDA/flush-denormals.cu

Modified: cfe/trunk/include/clang/Basic/LangOptions.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=337639&r1=337638&r2=337639&view=diff
==
--- cfe/trunk/include/clang/Basic/LangOptions.def (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.def Fri Jul 20 19:02:22 2018
@@ -209,7 +209,6 @@ LANGOPT(RenderScript  , 1, 0, "Rende
 LANGOPT(CUDAIsDevice  , 1, 0, "compiling for CUDA device")
 LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA 
device code")
 LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr 
functions as __host__ __device__")
-LANGOPT(CUDADeviceFlushDenormalsToZero, 1, 0, "flushing denormals to zero")
 LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate 
transcendental functions")
 LANGOPT(CUDARelocatableDeviceCode, 1, 0, "generate relocatable device code")
 

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=337639&r1=337638&r2=337639&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Jul 20 19:02:22 2018
@@ -1800,7 +1800,7 @@ void CodeGenModule::ConstructDefaultFnAt
 FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
 
 // Respect -fcuda-flush-denormals-to-zero.
-if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+if (CodeGenOpts.FlushDenorm)
   FuncAttrs.addAttribute("nvptx-f32ftz", "true");
   }
 }

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=337639&r1=337638&r2=337639&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Jul 20 19:02:22 2018
@@ -526,7 +526,7 @@ void CodeGenModule::Release() {
 // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"
 // property.)
 getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
-  LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
+  CodeGenOpts.FlushDenorm ? 1 : 0);
   }
 
   // Emit OpenCL specific module metadata: OpenCL/SPIR version.

Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=337639&r1=337638&r2=337639&view=diff
==
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Fri Jul 20 19:02:22 2018
@@ -690,7 +690,9 @@ static bool ParseCodeGenArgs(CodeGenOpti
 Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
 Args.hasArg(OPT_cl_fast_relaxed_math));
   Opts.Reassociate = Args.hasArg(OPT_mreassociate);
-  Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero);
+  Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero) ||
+ (Args.hasArg(OPT_fcuda_is_device) &&
+  Args.hasArg(OPT_fcuda_flush_denormals_to_zero));
   Opts.CorrectlyRoundedDivSqrt =
   Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt);
   Opts.UniformWGSize =
@@ -2191,9 +2193,6 @@ static void ParseLangArgs(LangOptions &O
   if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
 Opts.CUDAHostDeviceConstexpr = 0;
 
-  if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_flush_denormals_to_zero))
-Opts.CUDADeviceFlushDenormalsToZero = 1;
-
   if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
 Opts.CUDADeviceApproxTranscendentals = 1;
 

Modified: cfe/trunk/test/CodeGenCUDA/flush-denormals.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/flush-denormals.cu?rev=337639&r1=337638&r2=337639&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/flush-denormals.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/flush-denormals.cu Fri Jul 20 19:02:22 2018
@@ -5,6 +5,13 @@
 // RUN:   -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
 // RUN:   FileCheck %s -check-prefix CHECK -check-prefix FTZ
 
+// RUN: %clang_cc1 -fcuda-is-device -x hip \
+// RUN:   -triple

r337791 - Enable .hip files for test/Driver

2018-07-23 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Jul 23 18:03:44 2018
New Revision: 337791

URL: http://llvm.org/viewvc/llvm-project?rev=337791&view=rev
Log:
Enable .hip files for test/Driver

Partially revert r334128 due to regressions.

Modified:
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/lit.local.cfg

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=337791&r1=337790&r2=337791&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Mon Jul 23 18:03:44 2018
@@ -2809,7 +2809,7 @@ public:
   C.MakeAction(HostAction);
   UnbundlingHostAction->registerDependentActionInfo(
   C.getSingleOffloadToolChain(),
-  /*BoundArch=*/"all", Action::OFK_Host);
+  /*BoundArch=*/StringRef(), Action::OFK_Host);
   HostAction = UnbundlingHostAction;
 }
 
@@ -3868,7 +3868,7 @@ InputInfo Driver::BuildJobsForActionNoCa
   StringRef Arch;
   if (TargetDeviceOffloadKind == Action::OFK_HIP) {
 if (UI.DependentOffloadKind == Action::OFK_Host)
-  Arch = "all";
+  Arch = StringRef();
 else
   Arch = UI.DependentBoundArch;
   } else

Modified: cfe/trunk/test/Driver/lit.local.cfg
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/lit.local.cfg?rev=337791&r1=337790&r2=337791&view=diff
==
--- cfe/trunk/test/Driver/lit.local.cfg (original)
+++ cfe/trunk/test/Driver/lit.local.cfg Mon Jul 23 18:03:44 2018
@@ -1,5 +1,5 @@
 config.suffixes = ['.c', '.cpp', '.h', '.m', '.mm', '.S', '.s', '.f90', '.f95',
-   '.cu', '.rs', '.cl']
+   '.cu', '.rs', '.cl', '.hip']
 config.substitutions = list(config.substitutions)
 config.substitutions.insert(0,
 ('%clang_cc1',


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r337793 - [HIP] pass -target-cpu when running the device-mode compiler

2018-07-23 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Jul 23 18:40:44 2018
New Revision: 337793

URL: http://llvm.org/viewvc/llvm-project?rev=337793&view=rev
Log:
[HIP] pass -target-cpu when running the device-mode compiler

Differential Revision: https://reviews.llvm.org/D49643

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/test/Driver/hip-toolchain.hip

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=337793&r1=337792&r2=337793&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Mon Jul 23 18:40:44 2018
@@ -232,6 +232,8 @@ void HIPToolChain::addClangTargetOptions
   assert(DeviceOffloadingKind == Action::OFK_HIP &&
  "Only HIP offloading kinds are supported for GPUs.");
 
+  CC1Args.push_back("-target-cpu");
+  CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
   CC1Args.push_back("-fcuda-is-device");
 
   if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,

Modified: cfe/trunk/test/Driver/hip-toolchain.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain.hip?rev=337793&r1=337792&r2=337793&view=diff
==
--- cfe/trunk/test/Driver/hip-toolchain.hip (original)
+++ cfe/trunk/test/Driver/hip-toolchain.hip Mon Jul 23 18:40:44 2018
@@ -14,14 +14,14 @@
 
 // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" 
 // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc"
-// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-fcuda-is-device"
-// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx803"
+// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc"
-// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-fcuda-is-device"
-// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx803"
+// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC:".*b.hip"]]
 
 // CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]]
@@ -40,14 +40,14 @@
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" 
 // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc"
-// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-fcuda-is-device"
-// CHECK-SAME: {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
+// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[A_BC:".*bc"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[A_SRC]]
 
 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
 // CHECK-SAME: "-aux-triple" "x86_64--linux-gnu" "-emit-llvm-bc"
-// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-fcuda-is-device"
-// CHECK-SAME: {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip"
+// CHECK-SAME: {{.*}} "-main-file-name" "b.hip" {{.*}} "-target-cpu" "gfx900"
+// CHECK-SAME: "-fcuda-is-device" {{.*}} "-o" [[B_BC:".*bc"]] "-x" "hip"
 // CHECK-SAME: {{.*}} [[B_SRC]]
 
 // CHECK: [[LLVM_LINK]] [[A_BC]] [[B_BC]]


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r337797 - Attempt to fix regression due to r337791

2018-07-23 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Jul 23 19:12:24 2018
New Revision: 337797

URL: http://llvm.org/viewvc/llvm-project?rev=337797&view=rev
Log:
Attempt to fix regression due to r337791

Modified:
cfe/trunk/test/Driver/hip-toolchain.hip

Modified: cfe/trunk/test/Driver/hip-toolchain.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain.hip?rev=337797&r1=337796&r2=337797&view=diff
==
--- cfe/trunk/test/Driver/hip-toolchain.hip (original)
+++ cfe/trunk/test/Driver/hip-toolchain.hip Mon Jul 23 19:12:24 2018
@@ -80,5 +80,5 @@
 // CHECK-SAME: 
"-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
 // CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" 
"-outputs=[[BUNDLE:.*o]]"
 
-// CHECK: [[LD:".*ld.lld"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
+// CHECK: [[LD:".*ld.*"]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
 // CHECK-SAME: {{.*}} "-T" "{{.*}}.lk"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r338188 - [CUDA][HIP] Allow function-scope static const variable

2018-07-27 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jul 27 20:05:25 2018
New Revision: 338188

URL: http://llvm.org/viewvc/llvm-project?rev=338188&view=rev
Log:
[CUDA][HIP] Allow function-scope static const variable

CUDA 8.0 E.3.9.4 says: Within the body of a __device__ or __global__
function, only __shared__ variables or variables without any device
memory qualifiers may be declared with static storage class.

It is unclear how a function-scope non-const static variable
without device memory qualifier is implemented, therefore only static
const variable without device memory qualifier is allowed, which
can be emitted as a global variable in constant address space.

Currently clang only allows function-scope static variable with
__shared__ qualifier.

This patch also allows function-scope static const variable without
device memory qualifier and emits it as a global variable in constant
address space.

Differential Revision: https://reviews.llvm.org/D49931

Modified:
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/Sema/SemaDecl.cpp
cfe/trunk/test/CodeGenCUDA/device-var-init.cu
cfe/trunk/test/SemaCUDA/device-var-init.cu

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=338188&r1=338187&r2=338188&view=diff
==
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Fri Jul 27 20:05:25 
2018
@@ -7129,7 +7129,8 @@ def err_shared_var_init : Error<
 "initialization is not supported for __shared__ variables.">;
 def err_device_static_local_var : Error<
 "within a %select{__device__|__global__|__host__|__host__ __device__}0 "
-"function, only __shared__ variables may be marked 'static'">;
+"function, only __shared__ variables or const variables without device "
+"memory qualifier may be marked 'static'">;
 def err_cuda_vla : Error<
 "cannot use variable-length arrays in "
 "%select{__device__|__global__|__host__|__host__ __device__}0 functions">;

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=338188&r1=338187&r2=338188&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Jul 27 20:05:25 2018
@@ -3176,6 +3176,10 @@ LangAS CodeGenModule::GetGlobalVarAddres
   return LangAS::cuda_constant;
 else if (D && D->hasAttr())
   return LangAS::cuda_shared;
+else if (D && D->hasAttr())
+  return LangAS::cuda_device;
+else if (D && D->getType().isConstQualified())
+  return LangAS::cuda_constant;
 else
   return LangAS::cuda_device;
   }

Modified: cfe/trunk/lib/Sema/SemaDecl.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDecl.cpp?rev=338188&r1=338187&r2=338188&view=diff
==
--- cfe/trunk/lib/Sema/SemaDecl.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDecl.cpp Fri Jul 27 20:05:25 2018
@@ -11914,14 +11914,25 @@ void Sema::FinalizeDeclaration(Decl *Thi
 NewAttr->setInherited(true);
 VD->addAttr(NewAttr);
   }
-  // CUDA E.2.9.4: Within the body of a __device__ or __global__
-  // function, only __shared__ variables may be declared with
-  // static storage class.
-  if (getLangOpts().CUDA && !VD->hasAttr() &&
-  CUDADiagIfDeviceCode(VD->getLocation(),
-   diag::err_device_static_local_var)
-  << CurrentCUDATarget())
-VD->setInvalidDecl();
+  // CUDA 8.0 E.3.9.4: Within the body of a __device__ or __global__
+  // function, only __shared__ variables or variables without any device
+  // memory qualifiers may be declared with static storage class.
+  // Note: It is unclear how a function-scope non-const static variable
+  // without device memory qualifier is implemented, therefore only static
+  // const variable without device memory qualifier is allowed.
+  [&]() {
+if (!getLangOpts().CUDA)
+  return;
+if (VD->hasAttr())
+  return;
+if (VD->getType().isConstQualified() &&
+!(VD->hasAttr() || 
VD->hasAttr()))
+  return;
+if (CUDADiagIfDeviceCode(VD->getLocation(),
+ diag::err_device_static_local_var)
+<< CurrentCUDATarget())
+  VD->setInvalidDecl();
+  }();
 }
   }
 

Modified: cfe/trunk/test/CodeGenCUDA/device-var-init.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-var-init.cu?rev=338188&r1=338187&r2=338188&view=diff
==

r338805 - Sema: Fix explicit address space cast involving void pointers

2018-08-02 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Aug  2 20:18:56 2018
New Revision: 338805

URL: http://llvm.org/viewvc/llvm-project?rev=338805&view=rev
Log:
Sema: Fix explicit address space cast involving void pointers

Explicit cast of a void pointer to a pointer type in different address space is
incorrectly classified as bitcast, which causes invalid bitcast in codegen.

The patch fixes that by checking the address space of the source and destination
type and set the correct cast kind.

Differential Revision: https://reviews.llvm.org/D50003

Modified:
cfe/trunk/lib/Sema/SemaCast.cpp
cfe/trunk/test/CodeGenCXX/address-space-cast.cpp

Modified: cfe/trunk/lib/Sema/SemaCast.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCast.cpp?rev=338805&r1=338804&r2=338805&view=diff
==
--- cfe/trunk/lib/Sema/SemaCast.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCast.cpp Thu Aug  2 20:18:56 2018
@@ -1044,6 +1044,17 @@ void CastOperation::CheckStaticCast() {
   }
 }
 
+static bool IsAddressSpaceConversion(QualType SrcType, QualType DestType) {
+  auto *SrcPtrType = SrcType->getAs();
+  if (!SrcPtrType)
+return false;
+  auto *DestPtrType = DestType->getAs();
+  if (!DestPtrType)
+return false;
+  return SrcPtrType->getPointeeType().getAddressSpace() !=
+ DestPtrType->getPointeeType().getAddressSpace();
+}
+
 /// TryStaticCast - Check if a static cast can be performed, and do so if
 /// possible. If @p CStyle, ignore access restrictions on hierarchy casting
 /// and casting away constness.
@@ -1185,7 +1196,9 @@ static TryCastResult TryStaticCast(Sema
   return TC_Failed;
 }
   }
-  Kind = CK_BitCast;
+  Kind = IsAddressSpaceConversion(SrcType, DestType)
+ ? CK_AddressSpaceConversion
+ : CK_BitCast;
   return TC_Success;
 }
 
@@ -1964,12 +1977,6 @@ static bool fixOverloadedReinterpretCast
   return Result.isUsable();
 }
 
-static bool IsAddressSpaceConversion(QualType SrcType, QualType DestType) {
-  return SrcType->isPointerType() && DestType->isPointerType() &&
- SrcType->getAs()->getPointeeType().getAddressSpace() !=
- 
DestType->getAs()->getPointeeType().getAddressSpace();
-}
-
 static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
 QualType DestType, bool CStyle,
 SourceRange OpRange,

Modified: cfe/trunk/test/CodeGenCXX/address-space-cast.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/address-space-cast.cpp?rev=338805&r1=338804&r2=338805&view=diff
==
--- cfe/trunk/test/CodeGenCXX/address-space-cast.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/address-space-cast.cpp Thu Aug  2 20:18:56 2018
@@ -3,13 +3,63 @@
 #define __private__ __attribute__((address_space(5)))
 
 void func_pchar(__private__ char *x);
+void func_pvoid(__private__ void *x);
+void func_pint(__private__ int *x);
 
-void test_cast(char *gen_ptr) {
+void test_cast(char *gen_char_ptr, void *gen_void_ptr, int *gen_int_ptr) {
   // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
   // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
-  __private__ char *priv_ptr = (__private__ char *)gen_ptr;
+  __private__ char *priv_char_ptr = (__private__ char *)gen_char_ptr;
 
   // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
+  priv_char_ptr = (__private__ char *)gen_void_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
+  priv_char_ptr = (__private__ char *)gen_int_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
+  __private__ void *priv_void_ptr = (__private__ void *)gen_char_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
+  priv_void_ptr = (__private__ void *)gen_void_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i32* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: store i8 addrspace(5)* %[[cast]]
+  priv_void_ptr = (__private__ void *)gen_int_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i32 addrspace(5)*
+  // CHECK-NEXT: store i32 addrspace(5)* %[[cast]]
+  __private__ int *priv_int_ptr = (__private__ int *)gen_void_ptr;
+
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: call void @_Z10func_pcharPU3AS5c(i8 addrspace(5)* %[[cast]])
+  func_pchar((__private__ char *)gen_char_ptr);
+
+  // CHECK: %[[cast:.*]] = addrspacecast i8* %{{.*}} to i8 addrspace(5)*
+  // CHECK-NEXT: call void @_Z10func_pcharPU3AS5c(i8 addrspace(5)* %[[cast]])
+  func_pchar((__private__

r325031 - [AMDGPU] Change constant addr space to 4

2018-02-13 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Feb 13 10:01:21 2018
New Revision: 325031

URL: http://llvm.org/viewvc/llvm-project?rev=325031&view=rev
Log:
[AMDGPU] Change constant addr space to 4

Differential Revision: https://reviews.llvm.org/D43171

Added:
cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
Removed:
cfe/trunk/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/test/CodeGen/target-data.c
cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl
cfe/trunk/test/CodeGenOpenCL/address-spaces.cl
cfe/trunk/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
cfe/trunk/test/CodeGenOpenCL/amdgpu-nullptr.cl
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn.cl
cfe/trunk/test/CodeGenOpenCL/cast_image.cl
cfe/trunk/test/CodeGenOpenCL/opencl_types.cl
cfe/trunk/test/CodeGenOpenCL/private-array-initialization.cl
cfe/trunk/test/CodeGenOpenCL/size_t.cl
cfe/trunk/test/CodeGenOpenCL/vla.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=325031&r1=325030&r2=325031&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Feb 13 10:01:21 2018
@@ -21,9 +21,9 @@
 // SI+ only builtins.
 
//===--===//
 
-BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*2", "nc")
-BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*2", "nc")
-BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*2", "nc")
+BUILTIN(__builtin_amdgcn_dispatch_ptr, "Uc*4", "nc")
+BUILTIN(__builtin_amdgcn_kernarg_segment_ptr, "Uc*4", "nc")
+BUILTIN(__builtin_amdgcn_implicitarg_ptr, "Uc*4", "nc")
 
 BUILTIN(__builtin_amdgcn_workgroup_id_x, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_workgroup_id_y, "Ui", "nc")

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=325031&r1=325030&r2=325031&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Feb 13 10:01:21 2018
@@ -38,7 +38,7 @@ static const char *const DataLayoutStrin
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
 static const char *const DataLayoutStringSIGenericIsZero =
-"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32"
+"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
@@ -46,11 +46,11 @@ static const LangASMap AMDGPUPrivIsZeroD
 4, // Default
 1, // opencl_global
 3, // opencl_local
-2, // opencl_constant
+4, // opencl_constant
 0, // opencl_private
 4, // opencl_generic
 1, // cuda_device
-2, // cuda_constant
+4, // cuda_constant
 3  // cuda_shared
 };
 
@@ -58,11 +58,11 @@ static const LangASMap AMDGPUGenIsZeroDe
 0, // Default
 1, // opencl_global
 3, // opencl_local
-2, // opencl_constant
+4, // opencl_constant
 5, // opencl_private
 0, // opencl_generic
 1, // cuda_device
-2, // cuda_constant
+4, // cuda_constant
 3  // cuda_shared
 };
 
@@ -70,11 +70,11 @@ static const LangASMap AMDGPUPrivIsZeroD
 0, // Default
 1, // opencl_global
 3, // opencl_local
-2, // opencl_constant
+4, // opencl_constant
 0, // opencl_private
 4, // opencl_generic
 1, // cuda_device
-2, // cuda_constant
+4, // cuda_constant
 3  // cuda_shared
 };
 
@@ -82,11 +82,11 @@ static const LangASMap AMDGPUGenIsZeroDe
 5, // Default
 1, // opencl_global
 3, // opencl_local
-2, // opencl_constant
+4, // opencl_constant
 5, // opencl_private
 0, // opencl_generic
 1, // cuda_device
-2, // cuda_constant
+4, // cuda_constant
 3  // cuda_shared
 };
 } // namespace targets

Modified: cfe/trunk/test/CodeGen/target-data.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/target-data.c?rev=325031&r1=325030&r2=325031&view=diff
==
--- cfe/trunk/test/CodeGen/target-data.c (original)
+++ cfe/trunk/test/CodeGen/target-data.c Tue Feb 13 10:01:21 2018
@@ -132,12 +132,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
+// R600SI: target datalayout = 
"e-p:64:64-p1:6

r325264 - [OpenCL] Fix __enqueue_block for block with captures

2018-02-15 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Feb 15 08:39:19 2018
New Revision: 325264

URL: http://llvm.org/viewvc/llvm-project?rev=325264&view=rev
Log:
[OpenCL] Fix __enqueue_block for block with captures

The following test case causes issue with codegen of __enqueue_block

void (^block)(void) = ^{ callee(id, out); };

enqueue_kernel(queue, 0, ndrange, block);
Clang first does codegen for block expression in the first line and deletes its 
block info.
Clang then tries to do codegen for the same block expression again for the 
second line,
and fails because the block info is gone.

The fix is to do normal codegen for both lines. Introduce an API to OpenCL 
runtime to
record llvm block invoke function and llvm block literal emitted for each AST 
block
expression, and use the recorded information for generating the wrapper kernel.

The EmitBlockLiteral APIs are cleaned up to minimize changes to the normal 
codegen
of blocks.

Another minor issue is that some clean up AST expression is generated for block
with captures, which can be stripped by IgnoreImplicit.

Differential Revision: https://reviews.llvm.org/D43240

Modified:
cfe/trunk/lib/CodeGen/CGBlocks.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=325264&r1=325263&r2=325264&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Thu Feb 15 08:39:19 2018
@@ -740,27 +740,19 @@ void CodeGenFunction::destroyBlockInfos(
 }
 
 /// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
-   llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
   // If the block has no captures, we won't have a pre-computed
   // layout for it.
   if (!blockExpr->getBlockDecl()->hasCaptures()) {
 // The block literal is emitted as a global variable, and the block invoke
 // function has to be extracted from its initializer.
 if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
-  if (InvokeF) {
-auto *GV = cast(
-cast(Block)->stripPointerCasts());
-auto *BlockInit = cast(GV->getInitializer());
-*InvokeF = cast(
-BlockInit->getAggregateElement(2)->stripPointerCasts());
-  }
   return Block;
 }
 CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
 computeBlockInfo(CGM, this, blockInfo);
 blockInfo.BlockExpression = blockExpr;
-return EmitBlockLiteral(blockInfo, InvokeF);
+return EmitBlockLiteral(blockInfo);
   }
 
   // Find the block info for this block and take ownership of it.
@@ -769,11 +761,10 @@ llvm::Value *CodeGenFunction::EmitBlockL
  blockExpr->getBlockDecl()));
 
   blockInfo->BlockExpression = blockExpr;
-  return EmitBlockLiteral(*blockInfo, InvokeF);
+  return EmitBlockLiteral(*blockInfo);
 }
 
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
-   llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
   auto GenVoidPtrTy =
   IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : 
VoidPtrTy;
@@ -788,8 +779,6 @@ llvm::Value *CodeGenFunction::EmitBlockL
   BlockCGF.SanOpts = SanOpts;
   auto *InvokeFn = BlockCGF.GenerateBlockFunction(
   CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  if (InvokeF)
-*InvokeF = InvokeFn;
   auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
@@ -1024,6 +1013,11 @@ llvm::Value *CodeGenFunction::EmitBlockL
   llvm::Value *result = Builder.CreatePointerCast(
   blockAddr.getPointer(), 
ConvertType(blockInfo.getBlockExpr()->getType()));
 
+  if (IsOpenCL) {
+CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+   result);
+  }
+
   return result;
 }
 
@@ -1287,6 +1281,10 @@ static llvm::Constant *buildGlobalBlock(
   llvm::Constant *Result =
   llvm::ConstantExpr::getPointerCast(literal, RequiredType);
   CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+  if (CGM.getContext().getLangOpts().OpenCL)
+CGM.getOpenCLRuntime().recordBlockInfo(
+blockInfo.BlockExpression,
+cast(blockFn->stripPointerCasts()), Result);
   return Result;
 }

r325279 - Clean up AMDGCN tests

2018-02-15 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Feb 15 11:12:41 2018
New Revision: 325279

URL: http://llvm.org/viewvc/llvm-project?rev=325279&view=rev
Log:
Clean up AMDGCN tests

Differential Revision: https://reviews.llvm.org/D43340

Modified:
cfe/trunk/test/CodeGen/address-space.c
cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
cfe/trunk/test/CodeGenCXX/vla.cpp
cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
cfe/trunk/test/CodeGenOpenCL/address-space-constant-initializers.cl
cfe/trunk/test/CodeGenOpenCL/address-spaces.cl
cfe/trunk/test/CodeGenOpenCL/blocks.cl
cfe/trunk/test/CodeGenOpenCL/lifetime.cl
cfe/trunk/test/CodeGenOpenCL/vla.cl
cfe/trunk/test/Index/pipe-size.cl

Modified: cfe/trunk/test/CodeGen/address-space.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/address-space.c?rev=325279&r1=325278&r2=325279&view=diff
==
--- cfe/trunk/test/CodeGen/address-space.c (original)
+++ cfe/trunk/test/CodeGen/address-space.c Thu Feb 15 11:12:41 2018
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck 
-check-prefixes=CHECK,X86,GIZ %s
-// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm < %s | FileCheck 
-check-prefixes=CHECK,AMDGIZ,GIZ %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm < %s | FileCheck 
-check-prefixes=CHECK,X86 %s
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm < %s | FileCheck 
-check-prefixes=CHECK,AMDGCN %s
 
 // CHECK: @foo = common addrspace(1) global
 int foo __attribute__((address_space(1)));
@@ -24,10 +24,10 @@ __attribute__((address_space(2))) int *A
 
 // CHECK-LABEL: define void @test3()
 // X86: load i32 addrspace(2)*, i32 addrspace(2)** @B
-// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 
addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**)
+// AMDGCN: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 
addrspace(2)* addrspace(1)* @B to i32 addrspace(2)**)
 // CHECK: load i32, i32 addrspace(2)*
 // X86: load i32 addrspace(2)*, i32 addrspace(2)** @A
-// AMDGIZ: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 
addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**)
+// AMDGCN: load i32 addrspace(2)*, i32 addrspace(2)** addrspacecast (i32 
addrspace(2)* addrspace(1)* @A to i32 addrspace(2)**)
 // CHECK: store i32 {{.*}}, i32 addrspace(2)*
 void test3() {
   *A = *B;
@@ -39,8 +39,8 @@ typedef struct {
 } MyStruct;
 
 // CHECK-LABEL: define void @test4(
-// GIZ: call void @llvm.memcpy.p0i8.p2i8
-// GIZ: call void @llvm.memcpy.p2i8.p0i8
+// CHECK: call void @llvm.memcpy.p0i8.p2i8
+// CHECK: call void @llvm.memcpy.p2i8.p0i8
 void test4(MyStruct __attribute__((address_space(2))) *pPtr) {
   MyStruct s = pPtr[0];
   pPtr[0] = s;

Modified: cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp?rev=325279&r1=325278&r2=325279&view=diff
==
--- cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp 
(original)
+++ cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp Thu Feb 
15 11:12:41 2018
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-none-linux-gnu -emit-llvm -o - %s 
| FileCheck -check-prefixes=X86,CHECK %s
-// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa-amdgiz -DNO_TLS 
-emit-llvm -o - %s | FileCheck -check-prefixes=AMD,CHECK %s
+// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -DNO_TLS -emit-llvm -o 
- %s | FileCheck -check-prefixes=AMDGCN,CHECK %s
 
 namespace std {
   typedef decltype(sizeof(int)) size_t;
@@ -49,8 +49,8 @@ struct wantslist1 {
 };
 // X86: @_ZGR15globalInitList1_ = internal constant [3 x i32] [i32 1, i32 2, 
i32 3]
 // X86: @globalInitList1 = global %{{[^ ]+}} { i32* getelementptr inbounds ([3 
x i32], [3 x i32]* @_ZGR15globalInitList1_, i32 0, i32 0), i{{32|64}} 3 }
-// AMD: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] 
[i32 1, i32 2, i32 3]
-// AMD: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* addrspacecast 
(i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] addrspace(1)* 
@_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 }
+// AMDGCN: @_ZGR15globalInitList1_ = internal addrspace(1) constant [3 x i32] 
[i32 1, i32 2, i32 3]
+// AMDGCN: @globalInitList1 = addrspace(1) global %{{[^ ]+}} { i32* 
addrspacecast (i32 addrspace(1)* getelementptr inbounds ([3 x i32], [3 x i32] 
addrspace(1)* @_ZGR15globalInitList1_, i32 0, i32 0) to i32*), i{{32|64}} 3 }
 std::initializer_list globalInitList1 = {1, 2, 3};
 
 #ifndef NO_TLS
@@ -67,8 +67,8 @@ std::initializer_list thread_local
 
 // X86: @globalInitList2 = global %{{[^ ]+}} zeroinitializer
 // X86: @_ZGR15globalInitList2_ = internal global [2 x %[[WITHARG:[^ ]*]]] 
zeroinitializer
-// AMD

r305711 - CodeGen: Cast temporary variable to proper address space

2017-06-19 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Jun 19 12:03:41 2017
New Revision: 305711

URL: http://llvm.org/viewvc/llvm-project?rev=305711&view=rev
Log:
CodeGen: Cast temporary variable to proper address space

In C++ all variables are in default address space. Previously change has been
made to cast automatic variables to default address space. However that is
not sufficient since all temporary variables need to be casted to default
address space.

This patch casts all temporary variables to default address space except those
for passing indirect arguments since they are only used for load/store.

This patch only affects target having non-zero alloca address space.

Differential Revision: https://reviews.llvm.org/D33706

Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGen/address-space.c
cfe/trunk/test/CodeGen/default-address-space.c
cfe/trunk/test/CodeGen/x86_64-arguments.c
cfe/trunk/test/CodeGenCXX/amdgcn-automatic-variable.cpp

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=305711&r1=305710&r2=305711&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 19 12:03:41 2017
@@ -3813,7 +3813,8 @@ RValue CodeGenFunction::EmitCall(const C
   assert(NumIRArgs == 1);
   if (RV.isScalar() || RV.isComplex()) {
 // Make a temporary alloca to pass the argument.
-Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "indirect-arg-temp", false);
 IRCallArgs[FirstIRArg] = Addr.getPointer();
 
 LValue argLV = MakeAddrLValue(Addr, I->Ty);
@@ -3842,7 +3843,8 @@ RValue CodeGenFunction::EmitCall(const C
< Align.getQuantity()) ||
 (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
   // Create an aligned temporary, and copy to it.
-  Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+  Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
+ "byval-temp", false);
   IRCallArgs[FirstIRArg] = AI.getPointer();
   EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
 } else {

Modified: cfe/trunk/lib/CodeGen/CGDecl.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGDecl.cpp?rev=305711&r1=305710&r2=305711&view=diff
==
--- cfe/trunk/lib/CodeGen/CGDecl.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGDecl.cpp Mon Jun 19 12:03:41 2017
@@ -954,6 +954,7 @@ void CodeGenFunction::EmitLifetimeEnd(ll
 CodeGenFunction::AutoVarEmission
 CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   QualType Ty = D.getType();
+  assert(Ty.getAddressSpace() == LangAS::Default);
 
   AutoVarEmission emission(D);
 
@@ -1046,8 +1047,7 @@ CodeGenFunction::EmitAutoVarAlloca(const
   // Create the alloca.  Note that we set the name separately from
   // building the instruction so that it's there even in no-asserts
   // builds.
-  address = CreateTempAlloca(allocaTy, allocaAlignment);
-  address.getPointer()->setName(D.getName());
+  address = CreateTempAlloca(allocaTy, allocaAlignment, D.getName());
 
   // Don't emit lifetime markers for MSVC catch parameters. The lifetime of
   // the catch parameter starts in the catchpad instruction, and we can't
@@ -1107,27 +1107,9 @@ CodeGenFunction::EmitAutoVarAlloca(const
 llvm::Type *llvmTy = ConvertTypeForMem(elementType);
 
 // Allocate memory for the array.
-llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
-vla->setAlignment(alignment.getQuantity());
-
-address = Address(vla, alignment);
+address = CreateTempAlloca(llvmTy, alignment, "vla", elementCount);
   }
 
-  // Alloca always returns a pointer in alloca address space, which may
-  // be different from the type defined by the language. For example,
-  // in C++ the auto variables are in the default address space. Therefore
-  // cast alloca to the expected address space when necessary.
-  auto T = D.getType();
-  assert(T.getAddressSpace() == LangAS::Default);
-  if (getASTAllocaAddressSpace() != LangAS::Default) {
-auto *Addr = getTargetHooks().performAddrSpaceCast(
-*this, address.getPointer(), getASTAllocaAddressSpace(),
-T.getAddressSpace(),
-address.getElementType()->getPointerTo(
-getContext().getTargetAddressSpace(T.getAddressSpace())),
-/*non-null*/ true);
-address = Address(Addr, address.getAlignment());
-  }
   setAddrOfLocalVar(&D, address);
   emission.Addr = address;
 

Modified

r326725 - [AMDGPU] Clean up old address space mapping and fix constant address space value

2018-03-05 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Mar  5 09:50:10 2018
New Revision: 326725

URL: http://llvm.org/viewvc/llvm-project?rev=326725&view=rev
Log:
[AMDGPU] Clean up old address space mapping and fix constant address space value

Differential Revision: https://reviews.llvm.org/D43911

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=326725&r1=326724&r2=326725&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Mon Mar  5 09:50:10 2018
@@ -32,62 +32,33 @@ static const char *const DataLayoutStrin
 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
-static const char *const DataLayoutStringSIPrivateIsZero =
-"e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
-
-static const char *const DataLayoutStringSIGenericIsZero =
+static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
 
-static const LangASMap AMDGPUPrivIsZeroDefIsGenMap = {
-4, // Default
-1, // opencl_global
-3, // opencl_local
-4, // opencl_constant
-0, // opencl_private
-4, // opencl_generic
-1, // cuda_device
-4, // cuda_constant
-3  // cuda_shared
-};
-
-static const LangASMap AMDGPUGenIsZeroDefIsGenMap = {
-0, // Default
-1, // opencl_global
-3, // opencl_local
-4, // opencl_constant
-5, // opencl_private
-0, // opencl_generic
-1, // cuda_device
-4, // cuda_constant
-3  // cuda_shared
+const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
+Generic,  // Default
+Global,   // opencl_global
+Local,// opencl_local
+Constant, // opencl_constant
+Private,  // opencl_private
+Generic,  // opencl_generic
+Global,   // cuda_device
+Constant, // cuda_constant
+Local // cuda_shared
 };
 
-static const LangASMap AMDGPUPrivIsZeroDefIsPrivMap = {
-0, // Default
-1, // opencl_global
-3, // opencl_local
-4, // opencl_constant
-0, // opencl_private
-4, // opencl_generic
-1, // cuda_device
-4, // cuda_constant
-3  // cuda_shared
-};
-
-static const LangASMap AMDGPUGenIsZeroDefIsPrivMap = {
-5, // Default
-1, // opencl_global
-3, // opencl_local
-4, // opencl_constant
-5, // opencl_private
-0, // opencl_generic
-1, // cuda_device
-4, // cuda_constant
-3  // cuda_shared
+const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
+Private,  // Default
+Global,   // opencl_global
+Local,// opencl_local
+Constant, // opencl_constant
+Private,  // opencl_private
+Generic,  // opencl_generic
+Global,   // cuda_device
+Constant, // cuda_constant
+Local // cuda_shared
 };
 } // namespace targets
 } // namespace clang
@@ -282,29 +253,18 @@ void AMDGPUTargetInfo::fillValidCPUList(
 }
 
 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
-  if (isGenericZero(getTriple())) {
-AddrSpaceMap = DefaultIsPrivate ? &AMDGPUGenIsZeroDefIsPrivMap
-: &AMDGPUGenIsZeroDefIsGenMap;
-  } else {
-AddrSpaceMap = DefaultIsPrivate ? &AMDGPUPrivIsZeroDefIsPrivMap
-: &AMDGPUPrivIsZeroDefIsGenMap;
-  }
+  AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
 }
 
 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
-  : TargetInfo(Triple), AS(isGenericZero(Triple)),
-GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
-  auto IsGenericZero = isGenericZero(Triple);
-  resetDataLayout(isAMDGCN(getTriple())
-  ? (IsGenericZero ? DataLayoutStringSIGenericIsZero
-   : DataLayoutStringSIPrivateIsZero)
-  : DataLayoutStringR600);
-  assert(DataLayout->getAllocaAddrSpace() == AS.Private);
+: TargetInfo(Triple),
+  GPU(isAMDGCN(Triple) ? AMDGCNGPUs[0] : parseR600Name(Opts.CPU)) {
+  resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
+: DataLayoutStringR600);
+  assert(DataLayout->getAllocaAddrSpace() == Private);
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
- Triple.getEnvironment() == llvm::Triple::OpenCL ||
- Triple.getEnvi

r326937 - [OpenCL] Remove block invoke function from emitted block literal struct

2018-03-07 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar  7 11:32:58 2018
New Revision: 326937

URL: http://llvm.org/viewvc/llvm-project?rev=326937&view=rev
Log:
[OpenCL] Remove block invoke function from emitted block literal struct

OpenCL runtime tracks the invoke function emitted for
any block expression. Due to restrictions on blocks in
OpenCL (v2.0 s6.12.5), it is always possible to know the
block invoke function when emitting call of block expression
or __enqueue_kernel builtin functions. Since __enqueu_kernel
already has an argument for the invoke function, it is redundant
to have invoke function member in the llvm block literal structure.

This patch removes invoke function from the llvm block literal
structure. It also removes the bitcast of block invoke function
to the generic block literal type which is useless for OpenCL.

This will save some space for the kernel argument, and also
eliminate some store instructions.

Differential Revision: https://reviews.llvm.org/D43783

Modified:
cfe/trunk/lib/CodeGen/CGBlocks.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.cpp
cfe/trunk/lib/CodeGen/CGOpenCLRuntime.h
cfe/trunk/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
cfe/trunk/test/CodeGenOpenCL/blocks.cl
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBlocks.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBlocks.cpp?rev=326937&r1=326936&r2=326937&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBlocks.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBlocks.cpp Wed Mar  7 11:32:58 2018
@@ -307,25 +307,12 @@ static void initializeForBlockHeader(Cod
 
   assert(elementTypes.empty());
   if (CGM.getLangOpts().OpenCL) {
-// The header is basically 'struct { int; int; generic void *;
+// The header is basically 'struct { int; int;
 // custom_fields; }'. Assert that struct is packed.
-auto GenericAS =
-CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic);
-auto GenPtrAlign =
-CharUnits::fromQuantity(CGM.getTarget().getPointerAlign(GenericAS) / 
8);
-auto GenPtrSize =
-CharUnits::fromQuantity(CGM.getTarget().getPointerWidth(GenericAS) / 
8);
-assert(CGM.getIntSize() <= GenPtrSize);
-assert(CGM.getIntAlign() <= GenPtrAlign);
-assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
 elementTypes.push_back(CGM.IntTy); /* total size */
 elementTypes.push_back(CGM.IntTy); /* align */
-elementTypes.push_back(
-CGM.getOpenCLRuntime()
-.getGenericVoidPointerType()); /* invoke function */
-unsigned Offset =
-2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
-unsigned BlockAlign = GenPtrAlign.getQuantity();
+unsigned Offset = 2 * CGM.getIntSize().getQuantity();
+unsigned BlockAlign = CGM.getIntAlign().getQuantity();
 if (auto *Helper =
 CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
   for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
@@ -771,20 +758,12 @@ llvm::Value *CodeGenFunction::EmitBlockL
 
 llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
-  auto GenVoidPtrTy =
-  IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : 
VoidPtrTy;
-  LangAS GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
-  auto GenVoidPtrSize = CharUnits::fromQuantity(
-  CGM.getTarget().getPointerWidth(
-  CGM.getContext().getTargetAddressSpace(GenVoidPtrAddr)) /
-  8);
   // Using the computed layout, generate the actual block function.
   bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
   CodeGenFunction BlockCGF{CGM, true};
   BlockCGF.SanOpts = SanOpts;
   auto *InvokeFn = BlockCGF.GenerateBlockFunction(
   CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
-  auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
 
   // If there is nothing to capture, we can emit this as a global block.
   if (blockInfo.CanBeGlobal)
@@ -853,11 +832,12 @@ llvm::Value *CodeGenFunction::EmitBlockL
   llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
   getIntSize(), "block.align");
 }
-addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
-if (!IsOpenCL)
+if (!IsOpenCL) {
+  addHeaderField(llvm::ConstantExpr::getBitCast(InvokeFn, VoidPtrTy),
+ getPointerSize(), "block.invoke");
   addHeaderField(descriptor, getPointerSize(), "block.descriptor");
-else if (auto *Helper =
- CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+} else if (auto *Helper =
+   CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
   for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
 addHeaderField(
 I.first,
@@

r326946 - CodeGen: Fix address space of indirect function argument

2018-03-07 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar  7 13:45:40 2018
New Revision: 326946

URL: http://llvm.org/viewvc/llvm-project?rev=326946&view=rev
Log:
CodeGen: Fix address space of indirect function argument

The indirect function argument is in alloca address space in LLVM IR. However,
during Clang codegen for C++, the address space of indirect function argument
should match its address space in the source code, i.e., default addr space, 
even
for indirect argument. This is because destructor of the indirect argument may
be called in the caller function, and address of the indirect argument may be
taken, in either case the indirect function argument is expected to be in 
default
addr space, not the alloca address space.

Therefore, the indirect function argument should be mapped to the temp var
casted to default address space. The caller will cast it to alloca addr space
when passing it to the callee. In the callee, the argument is also casted to the
default address space and used.

CallArg is refactored to facilitate this fix.

Differential Revision: https://reviews.llvm.org/D34367

Added:
cfe/trunk/test/CodeGenCXX/amdgcn-func-arg.cpp
Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGCall.h
cfe/trunk/lib/CodeGen/CGClass.cpp
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/lib/CodeGen/CGExprCXX.cpp
cfe/trunk/lib/CodeGen/CGGPUBuiltin.cpp
cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
cfe/trunk/lib/CodeGen/CGObjCMac.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp
cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
cfe/trunk/test/CodeGenOpenCL/byval.cl

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=326946&r1=326945&r2=326946&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Wed Mar  7 13:45:40 2018
@@ -1160,7 +1160,7 @@ RValue CodeGenFunction::EmitAtomicExpr(A
 if (UseOptimizedLibcall && Res.getScalarVal()) {
   llvm::Value *ResVal = Res.getScalarVal();
   if (PostOp) {
-llvm::Value *LoadVal1 = Args[1].RV.getScalarVal();
+llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
 ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
   }
   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=326946&r1=326945&r2=326946&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Wed Mar  7 13:45:40 2018
@@ -1040,42 +1040,49 @@ void CodeGenFunction::ExpandTypeFromArgs
 }
 
 void CodeGenFunction::ExpandTypeToArgs(
-QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy,
+QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
 SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos) {
   auto Exp = getTypeExpansion(Ty, getContext());
   if (auto CAExp = dyn_cast(Exp.get())) {
-forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(),
-  [&](Address EltAddr) {
-  RValue EltRV =
-  convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
-  ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, 
IRCallArgPos);
-});
+Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+   : 
Arg.getKnownRValue().getAggregateAddress();
+forConstantArrayExpansion(
+*this, CAExp, Addr, [&](Address EltAddr) {
+  CallArg EltArg = CallArg(
+  convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()),
+  CAExp->EltTy);
+  ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs,
+   IRCallArgPos);
+});
   } else if (auto RExp = dyn_cast(Exp.get())) {
-Address This = RV.getAggregateAddress();
+Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+   : 
Arg.getKnownRValue().getAggregateAddress();
 for (const CXXBaseSpecifier *BS : RExp->Bases) {
   // Perform a single step derived-to-base conversion.
   Address Base =
   GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
 /*NullCheckValue=*/false, SourceLocation());
-  RValue BaseRV = RValue::getAggregate(Base);
+  CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType());
 
   // Recurse onto bases.
-  ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs,
+  ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
 }
 
 L

r327515 - CodeGen: Reduce LValue and CallArgList memory footprint before recommitting r326946

2018-03-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar 14 08:02:28 2018
New Revision: 327515

URL: http://llvm.org/viewvc/llvm-project?rev=327515&view=rev
Log:
CodeGen: Reduce LValue and CallArgList memory footprint before recommitting 
r326946

Recent change r326946 (https://reviews.llvm.org/D34367) causes regression in 
Eigen due to increased
memory footprint of CallArg.

This patch reduces LValue size from 112 to 96 bytes and reduces inline argument 
count of CallArgList
from 16 to 8.

It has been verified that this will let the added deep AST tree test pass with 
r326946.

In the long run, CallArg or LValue memory footprint should be further optimized.

Differential Revision: https://reviews.llvm.org/D5

Modified:
cfe/trunk/lib/CodeGen/CGCall.h
cfe/trunk/lib/CodeGen/CGValue.h

Modified: cfe/trunk/lib/CodeGen/CGCall.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.h?rev=327515&r1=327514&r2=327515&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.h (original)
+++ cfe/trunk/lib/CodeGen/CGCall.h Wed Mar 14 08:02:28 2018
@@ -224,7 +224,7 @@ public:
   /// CallArgList - Type for representing both the value and type of
   /// arguments in a call.
   class CallArgList :
-public SmallVector {
+public SmallVector {
   public:
 CallArgList() : StackBase(nullptr) {}
 

Modified: cfe/trunk/lib/CodeGen/CGValue.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGValue.h?rev=327515&r1=327514&r2=327515&view=diff
==
--- cfe/trunk/lib/CodeGen/CGValue.h (original)
+++ cfe/trunk/lib/CodeGen/CGValue.h Wed Mar 14 08:02:28 2018
@@ -193,7 +193,7 @@ class LValue {
 
   // The alignment to use when accessing this lvalue.  (For vector elements,
   // this is the alignment of the whole vector.)
-  int64_t Alignment;
+  unsigned Alignment;
 
   // objective-c's ivar
   bool Ivar:1;
@@ -215,13 +215,13 @@ class LValue {
   // to make the default bitfield pattern all-zeroes.
   bool ImpreciseLifetime : 1;
 
-  LValueBaseInfo BaseInfo;
-  TBAAAccessInfo TBAAInfo;
-
   // This flag shows if a nontemporal load/stores should be used when accessing
   // this lvalue.
   bool Nontemporal : 1;
 
+  LValueBaseInfo BaseInfo;
+  TBAAAccessInfo TBAAInfo;
+
   Expr *BaseIvarExp;
 
 private:
@@ -231,7 +231,10 @@ private:
"initializing l-value with zero alignment!");
 this->Type = Type;
 this->Quals = Quals;
-this->Alignment = Alignment.getQuantity();
+const unsigned MaxAlign = 1U << 31;
+this->Alignment = Alignment.getQuantity() <= MaxAlign
+  ? Alignment.getQuantity()
+  : MaxAlign;
 assert(this->Alignment == Alignment.getQuantity() &&
"Alignment exceeds allowed max!");
 this->BaseInfo = BaseInfo;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r327516 - Add deep AST tree test for r327515

2018-03-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar 14 08:03:31 2018
New Revision: 327516

URL: http://llvm.org/viewvc/llvm-project?rev=327516&view=rev
Log:
Add deep AST tree test for r327515

Added:
cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp

Added: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327516&view=auto
==
--- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 08:03:31 2018
@@ -0,0 +1,262 @@
+// RUN: %clang_cc1 %s
+// This test will cause clang to generate a deep AST tree with many CallArgs.
+// This is to make sure there is no stack overflow for such situations.
+// It is based on a use case in Eigen: 
+// https://eigen.tuxfamily.org/dox/group__TutorialAdvancedInitialization.html
+//
+struct VectorBuilder {
+  VectorBuilder &operator,(int);
+};
+void f() {
+  VectorBuilder(),
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+

r327529 - Attempt to fix failure of deep-ast-tree.cpp on ppc64 and atom

2018-03-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar 14 09:47:49 2018
New Revision: 327529

URL: http://llvm.org/viewvc/llvm-project?rev=327529&view=rev
Log:
Attempt to fix failure of deep-ast-tree.cpp on ppc64 and atom

Modified:
cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp

Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327529&r1=327528&r2=327529&view=diff
==
--- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 09:47:49 2018
@@ -130,6 +130,8 @@ void f() {
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+  /* some archs have smaller stack size */
+#if !defined(__ppc__) && !defined(__arm__) 
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
@@ -257,6 +259,7 @@ void f() {
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+#endif
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0;
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r327547 - Attempt to fix failure of deep-ast-tree.cpp on atom and s390

2018-03-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar 14 11:24:38 2018
New Revision: 327547

URL: http://llvm.org/viewvc/llvm-project?rev=327547&view=rev
Log:
Attempt to fix failure of deep-ast-tree.cpp on atom and s390

Modified:
cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp

Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327547&r1=327546&r2=327547&view=diff
==
--- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 11:24:38 2018
@@ -131,7 +131,7 @@ void f() {
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   /* some archs have smaller stack size */
-#if !defined(__ppc__) && !defined(__arm__) 
+#if !defined(__ppc__) && !defined(__atom__) && !defined(__s390__)
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r327570 - Reduce AST depth for test deep-ast-tree.cpp for atom

2018-03-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar 14 13:41:05 2018
New Revision: 327570

URL: http://llvm.org/viewvc/llvm-project?rev=327570&view=rev
Log:
Reduce AST depth for test deep-ast-tree.cpp for atom

Modified:
cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp

Modified: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327570&r1=327569&r2=327570&view=diff
==
--- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp Wed Mar 14 13:41:05 2018
@@ -20,6 +20,8 @@ void f() {
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
+/* some archs have smaller stack size */
+#if !defined(__atom__)
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
@@ -130,8 +132,7 @@ void f() {
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  /* some archs have smaller stack size */
-#if !defined(__ppc__) && !defined(__atom__) && !defined(__s390__)
+#if !defined(__ppc__) && !defined(__s390__)
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
@@ -260,6 +261,7 @@ void f() {
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
 #endif
+#endif
   
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0;
 }
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r327578 - Remove test deep-ast-tree.cpp

2018-03-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Mar 14 14:40:55 2018
New Revision: 327578

URL: http://llvm.org/viewvc/llvm-project?rev=327578&view=rev
Log:
Remove test deep-ast-tree.cpp

Since there is no reliable way to change the AST depth of this test by 
supported stack size
of the test environment, remove this test for now.

Removed:
cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp

Removed: cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp?rev=327577&view=auto
==
--- cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/deep-ast-tree.cpp (removed)
@@ -1,267 +0,0 @@
-// RUN: %clang_cc1 %s
-// This test will cause clang to generate a deep AST tree with many CallArgs.
-// This is to make sure there is no stack overflow for such situations.
-// It is based on a use case in Eigen: 
-// https://eigen.tuxfamily.org/dox/group__TutorialAdvancedInitialization.html
-//
-struct VectorBuilder {
-  VectorBuilder &operator,(int);
-};
-void f() {
-  VectorBuilder(),
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-/* some archs have smaller stack size */
-#if !defined(__atom__)
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,
-  
1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,

r327634 - Recommit r326946 after reducing CallArgList memory footprint

2018-03-15 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Mar 15 08:25:19 2018
New Revision: 327634

URL: http://llvm.org/viewvc/llvm-project?rev=327634&view=rev
Log:
Recommit r326946 after reducing CallArgList memory footprint

Added:
cfe/trunk/test/CodeGenCXX/amdgcn-func-arg.cpp
Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGCall.h
cfe/trunk/lib/CodeGen/CGClass.cpp
cfe/trunk/lib/CodeGen/CGDecl.cpp
cfe/trunk/lib/CodeGen/CGExprCXX.cpp
cfe/trunk/lib/CodeGen/CGGPUBuiltin.cpp
cfe/trunk/lib/CodeGen/CGObjCGNU.cpp
cfe/trunk/lib/CodeGen/CGObjCMac.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/lib/CodeGen/ItaniumCXXABI.cpp
cfe/trunk/lib/CodeGen/MicrosoftCXXABI.cpp
cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
cfe/trunk/test/CodeGenOpenCL/byval.cl

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=327634&r1=327633&r2=327634&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Thu Mar 15 08:25:19 2018
@@ -1160,7 +1160,7 @@ RValue CodeGenFunction::EmitAtomicExpr(A
 if (UseOptimizedLibcall && Res.getScalarVal()) {
   llvm::Value *ResVal = Res.getScalarVal();
   if (PostOp) {
-llvm::Value *LoadVal1 = Args[1].RV.getScalarVal();
+llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
 ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
   }
   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=327634&r1=327633&r2=327634&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Mar 15 08:25:19 2018
@@ -1040,42 +1040,49 @@ void CodeGenFunction::ExpandTypeFromArgs
 }
 
 void CodeGenFunction::ExpandTypeToArgs(
-QualType Ty, RValue RV, llvm::FunctionType *IRFuncTy,
+QualType Ty, CallArg Arg, llvm::FunctionType *IRFuncTy,
 SmallVectorImpl &IRCallArgs, unsigned &IRCallArgPos) {
   auto Exp = getTypeExpansion(Ty, getContext());
   if (auto CAExp = dyn_cast(Exp.get())) {
-forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(),
-  [&](Address EltAddr) {
-  RValue EltRV =
-  convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
-  ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, 
IRCallArgPos);
-});
+Address Addr = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+   : 
Arg.getKnownRValue().getAggregateAddress();
+forConstantArrayExpansion(
+*this, CAExp, Addr, [&](Address EltAddr) {
+  CallArg EltArg = CallArg(
+  convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation()),
+  CAExp->EltTy);
+  ExpandTypeToArgs(CAExp->EltTy, EltArg, IRFuncTy, IRCallArgs,
+   IRCallArgPos);
+});
   } else if (auto RExp = dyn_cast(Exp.get())) {
-Address This = RV.getAggregateAddress();
+Address This = Arg.hasLValue() ? Arg.getKnownLValue().getAddress()
+   : 
Arg.getKnownRValue().getAggregateAddress();
 for (const CXXBaseSpecifier *BS : RExp->Bases) {
   // Perform a single step derived-to-base conversion.
   Address Base =
   GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
 /*NullCheckValue=*/false, SourceLocation());
-  RValue BaseRV = RValue::getAggregate(Base);
+  CallArg BaseArg = CallArg(RValue::getAggregate(Base), BS->getType());
 
   // Recurse onto bases.
-  ExpandTypeToArgs(BS->getType(), BaseRV, IRFuncTy, IRCallArgs,
+  ExpandTypeToArgs(BS->getType(), BaseArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
 }
 
 LValue LV = MakeAddrLValue(This, Ty);
 for (auto FD : RExp->Fields) {
-  RValue FldRV = EmitRValueForField(LV, FD, SourceLocation());
-  ExpandTypeToArgs(FD->getType(), FldRV, IRFuncTy, IRCallArgs,
+  CallArg FldArg =
+  CallArg(EmitRValueForField(LV, FD, SourceLocation()), FD->getType());
+  ExpandTypeToArgs(FD->getType(), FldArg, IRFuncTy, IRCallArgs,
IRCallArgPos);
 }
   } else if (isa(Exp.get())) {
-ComplexPairTy CV = RV.getComplexVal();
+ComplexPairTy CV = Arg.getKnownRValue().getComplexVal();
 IRCallArgs[IRCallArgPos++] = CV.first;
 IRCallArgs[IRCallArgPos++] = CV.second;
   } else {
 assert(isa(Exp.get()));
+auto RV = Arg.getKnownRValue();
 assert(RV.isScalar() &&
"Unexpected non-scalar rvalue during struct expansion.");
 
@@ -3417,13 +3424,17 @@ void CodeGenFunction::EmitCall

r334021 - [CUDA][HIP] Do not emit type info when compiling for device

2018-06-05 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Jun  5 08:11:02 2018
New Revision: 334021

URL: http://llvm.org/viewvc/llvm-project?rev=334021&view=rev
Log:
[CUDA][HIP] Do not emit type info when compiling for device

CUDA/HIP does not support RTTI on device side, therefore there
is no point of emitting type info when compiling for device.

Emitting type info for device not only clutters the IR with useless
global variables, but also causes undefined symbol at linking
since vtable for cxxabiv1::class_type_info has external linkage.

Differential Revision: https://reviews.llvm.org/D47694

Modified:
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/test/CodeGenCUDA/device-vtable.cu

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=334021&r1=334020&r2=334021&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Jun  5 08:11:02 2018
@@ -4900,7 +4900,7 @@ llvm::Constant *CodeGenModule::GetAddrOf
   // Return a bogus pointer if RTTI is disabled, unless it's for EH.
   // FIXME: should we even be calling this method if RTTI is disabled
   // and it's not for EH?
-  if (!ForEH && !getLangOpts().RTTI)
+  if ((!ForEH && !getLangOpts().RTTI) || getLangOpts().CUDAIsDevice)
 return llvm::Constant::getNullValue(Int8PtrTy);
 
   if (ForEH && Ty->isObjCObjectPointerType() &&

Modified: cfe/trunk/test/CodeGenCUDA/device-vtable.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-vtable.cu?rev=334021&r1=334020&r2=334021&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/device-vtable.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-vtable.cu Tue Jun  5 08:11:02 2018
@@ -19,7 +19,9 @@ struct H  {
 //CHECK-HOST: @_ZTV1H =
 //CHECK-HOST-SAME: @_ZN1H6methodEv
 //CHECK-DEVICE-NOT: @_ZTV1H =
-
+//CHECK-DEVICE-NOT: @_ZTVN10__cxxabiv117__class_type_infoE
+//CHECK-DEVICE-NOT: @_ZTS1H
+//CHECK-DEVICE-NOT: @_ZTI1H
 struct D  {
__device__ virtual void method();
 };
@@ -27,7 +29,9 @@ struct D  {
 //CHECK-DEVICE: @_ZTV1D
 //CHECK-DEVICE-SAME: @_ZN1D6methodEv
 //CHECK-HOST-NOT: @_ZTV1D
-
+//CHECK-DEVICE-NOT: @_ZTVN10__cxxabiv117__class_type_infoE
+//CHECK-DEVICE-NOT: @_ZTS1D
+//CHECK-DEVICE-NOT: @_ZTI1D
 // This is the case with mixed host and device virtual methods.  It's
 // impossible to emit a valid vtable in that case because only host or
 // only device methods would be available during host or device
@@ -45,6 +49,9 @@ struct HD  {
 // CHECK-HOST-NOT: @_ZN2HD8d_methodEv
 // CHECK-HOST-SAME: null
 // CHECK-BOTH-SAME: ]
+// CHECK-DEVICE-NOT: @_ZTVN10__cxxabiv117__class_type_infoE
+// CHECK-DEVICE-NOT: @_ZTS2HD
+// CHECK-DEVICE-NOT: @_ZTI2HD
 
 void H::method() {}
 //CHECK-HOST: define void @_ZN1H6methodEv


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r334128 - [HIP] Fix unbundling

2018-06-06 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Jun  6 12:44:10 2018
New Revision: 334128

URL: http://llvm.org/viewvc/llvm-project?rev=334128&view=rev
Log:
[HIP] Fix unbundling

HIP uses clang-offload-bundler to bundle intermediate files for host
and different gpu archs together. When a file is unbundled,
clang-offload-bundler should be called only once, and the objects
for host and different gpu archs should be passed to the next
jobs. This is because Driver maintains CachedResults which maps
triple-arch string to output files for each job.

This patch fixes a bug in Driver::BuildJobsForActionNoCache which
uses incorrect key for CachedResults for HIP which causes
clang-offload-bundler being called mutiple times and incorrect
output files being used.

It only affects HIP.

Differential Revision: https://reviews.llvm.org/D47555

Added:
cfe/trunk/test/Driver/hip-binding.hip
Modified:
cfe/trunk/lib/Driver/Driver.cpp

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=334128&r1=334127&r2=334128&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Wed Jun  6 12:44:10 2018
@@ -2808,7 +2808,7 @@ public:
   C.MakeAction(HostAction);
   UnbundlingHostAction->registerDependentActionInfo(
   C.getSingleOffloadToolChain(),
-  /*BoundArch=*/StringRef(), Action::OFK_Host);
+  /*BoundArch=*/"all", Action::OFK_Host);
   HostAction = UnbundlingHostAction;
 }
 
@@ -3880,9 +3880,18 @@ InputInfo Driver::BuildJobsForActionNoCa
 
   // Get the unique string identifier for this dependence and cache the
   // result.
-  CachedResults[{A, GetTriplePlusArchString(
-UI.DependentToolChain, BoundArch,
-UI.DependentOffloadKind)}] = CurI;
+  StringRef Arch;
+  if (TargetDeviceOffloadKind == Action::OFK_HIP) {
+if (UI.DependentOffloadKind == Action::OFK_Host)
+  Arch = "all";
+else
+  Arch = UI.DependentBoundArch;
+  } else
+Arch = BoundArch;
+
+  CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch,
+UI.DependentOffloadKind)}] =
+  CurI;
 }
 
 // Now that we have all the results generated, select the one that should 
be

Added: cfe/trunk/test/Driver/hip-binding.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-binding.hip?rev=334128&view=auto
==
--- cfe/trunk/test/Driver/hip-binding.hip (added)
+++ cfe/trunk/test/Driver/hip-binding.hip Wed Jun  6 12:44:10 2018
@@ -0,0 +1,15 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: touch %t.o
+// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], 
outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] 
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], 
output: "[[IMG2:.*out]]"
+// CHECK-NOT: offload bundler
+// CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], 
output: "[[IMG3:.*out]]"
+// CHECK-NOT: offload bundler
+// CHECK: # "x86_64--linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", 
"[[IMG2]]", "[[IMG3]]"], output: "a.out"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r334457 - [CUDA][HIP] Set kernel calling convention before arrange function

2018-06-11 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Jun 11 17:16:33 2018
New Revision: 334457

URL: http://llvm.org/viewvc/llvm-project?rev=334457&view=rev
Log:
[CUDA][HIP] Set kernel calling convention before arrange function

Currently clang set kernel calling convention for CUDA/HIP after
arranging function, which causes incorrect kernel function type since
it depends on calling convention.

This patch moves setting kernel convention before arranging
function.

Differential Revision: https://reviews.llvm.org/D47733

Added:
cfe/trunk/test/CodeGenCUDA/kernel-args.cu
Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/lib/CodeGen/TargetInfo.h

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=334457&r1=334456&r2=334457&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Mon Jun 11 17:16:33 2018
@@ -255,6 +255,16 @@ CodeGenTypes::arrangeCXXMethodType(const
   FTP->getCanonicalTypeUnqualified().getAs(), MD);
 }
 
+/// Set calling convention for CUDA/HIP kernel.
+static void setCUDAKernelCallingConvention(CanQualType &FTy, CodeGenModule 
&CGM,
+   const FunctionDecl *FD) {
+  if (FD->hasAttr()) {
+const FunctionType *FT = FTy->getAs();
+CGM.getTargetCodeGenInfo().setCUDAKernelCallingConvention(FT);
+FTy = FT->getCanonicalTypeUnqualified();
+  }
+}
+
 /// Arrange the argument and result information for a declaration or
 /// definition of the given C++ non-static member function.  The
 /// member function must be an ordinary function, i.e. not a
@@ -264,7 +274,9 @@ CodeGenTypes::arrangeCXXMethodDeclaratio
   assert(!isa(MD) && "wrong method for constructors!");
   assert(!isa(MD) && "wrong method for destructors!");
 
-  CanQual prototype = GetFormalType(MD);
+  CanQualType FT = GetFormalType(MD).getAs();
+  setCUDAKernelCallingConvention(FT, CGM, MD);
+  auto prototype = FT.getAs();
 
   if (MD->isInstance()) {
 // The abstract case is perfectly fine.
@@ -424,6 +436,7 @@ CodeGenTypes::arrangeFunctionDeclaration
   CanQualType FTy = FD->getType()->getCanonicalTypeUnqualified();
 
   assert(isa(FTy));
+  setCUDAKernelCallingConvention(FTy, CGM, FD);
 
   // When declaring a function without a prototype, always use a
   // non-variadic type.

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=334457&r1=334456&r2=334457&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Mon Jun 11 17:16:33 2018
@@ -3671,8 +3671,6 @@ void CodeGenModule::EmitGlobalFunctionDe
 
   MaybeHandleStaticInExternC(D, Fn);
 
-  if (D->hasAttr())
-getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
 
   maybeSetTrivialComdat(*D, *Fn);
 

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=334457&r1=334456&r2=334457&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Jun 11 17:16:33 2018
@@ -7646,7 +7646,7 @@ public:
 llvm::Function *BlockInvokeFunc,
 llvm::Value *BlockLiteral) const override;
   bool shouldEmitStaticExternCAliases() const override;
-  void setCUDAKernelCallingConvention(llvm::Function *F) const override;
+  void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
 };
 }
 
@@ -7783,8 +7783,9 @@ bool AMDGPUTargetCodeGenInfo::shouldEmit
 }
 
 void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
-llvm::Function *F) const {
-  F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+const FunctionType *&FT) const {
+  FT = getABIInfo().getContext().adjustFunctionType(
+  FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
 }
 
 
//===--===//

Modified: cfe/trunk/lib/CodeGen/TargetInfo.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.h?rev=334457&r1=334456&r2=334457&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.h (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.h Mon Jun 11 17:16:33 2018
@@ -302,7 +302,7 @@ public:
   /// as 'used', and having internal linkage.
   virtual bool shouldEmitStaticExternCAliases() const { return true; }
 
-  virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
+  virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}
 };
 
 } // namespace

r334561 - [CUDA][HIP] Allow CUDA global functions to have amdgpu kernel attributes

2018-06-12 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Jun 12 16:58:59 2018
New Revision: 334561

URL: http://llvm.org/viewvc/llvm-project?rev=334561&view=rev
Log:
[CUDA][HIP] Allow CUDA __global__ functions to have amdgpu kernel attributes

There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, 
however
currently they are only allowed on OpenCL kernel functions.

This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP 
__global__
functions.

Differential Revision: https://reviews.llvm.org/D47958

Added:
cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
Modified:
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
cfe/trunk/lib/Sema/SemaDeclAttr.cpp
cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu
cfe/trunk/test/SemaOpenCL/invalid-kernel-attrs.cl

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=334561&r1=334560&r2=334561&view=diff
==
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Tue Jun 12 16:58:59 
2018
@@ -8435,7 +8435,7 @@ def err_reference_pipe_type : Error <
   "pipes packet types cannot be of reference type">;
 def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 
'main'">;
 def err_opencl_kernel_attr :
-  Error<"attribute %0 can only be applied to a kernel function">;
+  Error<"attribute %0 can only be applied to an OpenCL kernel function">;
 def err_opencl_return_value_with_address_space : Error<
   "return value cannot be qualified with address space">;
 def err_opencl_constant_no_init : Error<

Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=334561&r1=334560&r2=334561&view=diff
==
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Tue Jun 12 16:58:59 2018
@@ -6468,25 +6468,27 @@ void Sema::ProcessDeclAttributeList(Scop
 } else if (const auto *A = D->getAttr()) {
   Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
   D->setInvalidDecl();
-} else if (const auto *A = D->getAttr()) {
-  Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-<< A << ExpectedKernelFunction;
-  D->setInvalidDecl();
-} else if (const auto *A = D->getAttr()) {
-  Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-<< A << ExpectedKernelFunction;
-  D->setInvalidDecl();
-} else if (const auto *A = D->getAttr()) {
-  Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-<< A << ExpectedKernelFunction;
-  D->setInvalidDecl();
-} else if (const auto *A = D->getAttr()) {
-  Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-<< A << ExpectedKernelFunction;
-  D->setInvalidDecl();
 } else if (const auto *A = D->getAttr()) {
   Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
   D->setInvalidDecl();
+} else if (!D->hasAttr()) {
+  if (const auto *A = D->getAttr()) {
+Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+<< A << ExpectedKernelFunction;
+D->setInvalidDecl();
+  } else if (const auto *A = D->getAttr()) {
+Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+<< A << ExpectedKernelFunction;
+D->setInvalidDecl();
+  } else if (const auto *A = D->getAttr()) {
+Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+<< A << ExpectedKernelFunction;
+D->setInvalidDecl();
+  } else if (const auto *A = D->getAttr()) {
+Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+<< A << ExpectedKernelFunction;
+D->setInvalidDecl();
+  }
 }
   }
 }

Added: cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu?rev=334561&view=auto
==
--- cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/amdgpu-kernel-attrs.cu Tue Jun 12 16:58:59 2018
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx \
+// RUN: -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
+// RUN: -check-prefix=NAMD
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN: -verify -o - %s | FileCheck -check-prefix=NAMD %s
+
+#include "Inputs/cuda.h"
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
+__global__ void flat_work_group_size_32_64() {
+// CHECK: define amdgpu_kernel void @_Z26fl

r334837 - [NFC] Add CreateMemTempWithoutCast and CreateTempAllocaWithoutCast

2018-06-15 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jun 15 08:33:22 2018
New Revision: 334837

URL: http://llvm.org/viewvc/llvm-project?rev=334837&view=rev
Log:
[NFC] Add CreateMemTempWithoutCast and CreateTempAllocaWithoutCast

This is partial re-commit of r332982

Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=334837&r1=334836&r2=334837&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Jun 15 08:33:22 2018
@@ -3901,9 +3901,8 @@ RValue CodeGenFunction::EmitCall(const C
   assert(NumIRArgs == 1);
   if (!I->isAggregate()) {
 // Make a temporary alloca to pass the argument.
-Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "indirect-arg-temp", /*Alloca=*/nullptr,
- /*Cast=*/false);
+Address Addr = CreateMemTempWithoutCast(
+I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
 IRCallArgs[FirstIRArg] = Addr.getPointer();
 
 I->copyInto(*this, Addr);
@@ -3948,9 +3947,8 @@ RValue CodeGenFunction::EmitCall(const C
 }
 if (NeedCopy) {
   // Create an aligned temporary, and copy to it.
-  Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
- "byval-temp", /*Alloca=*/nullptr,
- /*Cast=*/false);
+  Address AI = CreateMemTempWithoutCast(
+  I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
   IRCallArgs[FirstIRArg] = AI.getPointer();
   I->copyInto(*this, AI);
 } else {

Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=334837&r1=334836&r2=334837&view=diff
==
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Fri Jun 15 08:33:22 2018
@@ -61,21 +61,30 @@ llvm::Value *CodeGenFunction::EmitCastTo
 
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
+Address CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type *Ty,
+ CharUnits Align,
+ const Twine &Name,
+ llvm::Value *ArraySize) {
+  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
+  Alloca->setAlignment(Align.getQuantity());
+  return Address(Alloca, Align);
+}
+
+/// CreateTempAlloca - This creates a alloca and inserts it into the entry
+/// block. The alloca is casted to default address space if necessary.
 Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
   const Twine &Name,
   llvm::Value *ArraySize,
-  Address *AllocaAddr,
-  bool CastToDefaultAddrSpace) {
-  auto Alloca = CreateTempAlloca(Ty, Name, ArraySize);
-  Alloca->setAlignment(Align.getQuantity());
+  Address *AllocaAddr) {
+  auto Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
   if (AllocaAddr)
-*AllocaAddr = Address(Alloca, Align);
-  llvm::Value *V = Alloca;
+*AllocaAddr = Alloca;
+  llvm::Value *V = Alloca.getPointer();
   // Alloca always returns a pointer in alloca address space, which may
   // be different from the type defined by the language. For example,
   // in C++ the auto variables are in the default address space. Therefore
   // cast alloca to the default address space when necessary.
-  if (CastToDefaultAddrSpace && getASTAllocaAddressSpace() != LangAS::Default) 
{
+  if (getASTAllocaAddressSpace() != LangAS::Default) {
 auto DestAddrSpace = getContext().getTargetAddressSpace(LangAS::Default);
 llvm::IRBuilderBase::InsertPointGuard IPG(Builder);
 // When ArraySize is nullptr, alloca is inserted at AllocaInsertPt,
@@ -128,19 +137,26 @@ Address CodeGenFunction::CreateIRTemp(Qu
 }
 
 Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
-   Address *Alloca,
-   bool CastToDefaultAddrSpace) {
+   Address *Alloca) {
   // FIXME: Should we prefer the preferred type alignment here?
-  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca,
-   CastToDefaultAddrSpace);
+  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name, Alloca);
 }
 
 Address CodeGenFunction::CreateMemTem

r334879 - Call CreateTempAllocaWithoutCast for ActiveFlag

2018-06-15 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jun 15 18:20:52 2018
New Revision: 334879

URL: http://llvm.org/viewvc/llvm-project?rev=334879&view=rev
Log:
Call CreateTempAllocaWithoutCast for ActiveFlag

This is partial re-commit of r332982.

Modified:
cfe/trunk/lib/CodeGen/CGCleanup.cpp

Modified: cfe/trunk/lib/CodeGen/CGCleanup.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCleanup.cpp?rev=334879&r1=334878&r2=334879&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCleanup.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCleanup.cpp Fri Jun 15 18:20:52 2018
@@ -283,8 +283,8 @@ void EHScopeStack::popNullFixups() {
 
 void CodeGenFunction::initFullExprCleanup() {
   // Create a variable to decide whether the cleanup needs to be run.
-  Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(),
-"cleanup.cond");
+  Address active = CreateTempAllocaWithoutCast(
+  Builder.getInt1Ty(), CharUnits::One(), "cleanup.cond");
 
   // Initialize it to false at a site that's guaranteed to be run
   // before each evaluation.


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r334886 - Add RUN line for amdgcn to lit test conditional-temporaries.cpp

2018-06-16 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Sat Jun 16 05:28:51 2018
New Revision: 334886

URL: http://llvm.org/viewvc/llvm-project?rev=334886&view=rev
Log:
Add RUN line for amdgcn to lit test conditional-temporaries.cpp

This is partial re-commit of r332982.

Modified:
cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp

Modified: cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp?rev=334886&r1=334885&r2=334886&view=diff
==
--- cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/conditional-temporaries.cpp Sat Jun 16 05:28:51 
2018
@@ -1,4 +1,6 @@
+// REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin9 -O3 | 
FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=amdgcn-amd-amdhsa -O3 | 
FileCheck %s
 
 namespace {
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r344057 - [CUDA][HIP] Fix ShouldDeleteSpecialMember for inherited constructors

2018-10-09 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct  9 08:53:14 2018
New Revision: 344057

URL: http://llvm.org/viewvc/llvm-project?rev=344057&view=rev
Log:
[CUDA][HIP] Fix ShouldDeleteSpecialMember for inherited constructors

ShouldDeleteSpecialMember is called upon inherited constructors.
It calls inferCUDATargetForImplicitSpecialMember.

Normally the special member enum passed to ShouldDeleteSpecialMember
matches the constructor. However this is not true when inherited
constructor is passed, where DefaultConstructor is passed to treat
the inherited constructor as DefaultConstructor. However
inferCUDATargetForImplicitSpecialMember expects the special
member enum argument to match the constructor, which results
in assertion when this expection is not satisfied.

This patch checks whether the constructor is inherited. If true it will
get the real special member enum for the constructor and pass it
to inferCUDATargetForImplicitSpecialMember.

Differential Revision: https://reviews.llvm.org/D51809

Added:
cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu
cfe/trunk/test/SemaCUDA/inherited-ctor.cu
Modified:
cfe/trunk/lib/Sema/SemaDeclCXX.cpp

Modified: cfe/trunk/lib/Sema/SemaDeclCXX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclCXX.cpp?rev=344057&r1=344056&r2=344057&view=diff
==
--- cfe/trunk/lib/Sema/SemaDeclCXX.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclCXX.cpp Tue Oct  9 08:53:14 2018
@@ -7222,8 +7222,17 @@ bool Sema::ShouldDeleteSpecialMember(CXX
   if (getLangOpts().CUDA) {
 // We should delete the special member in CUDA mode if target inference
 // failed.
-return inferCUDATargetForImplicitSpecialMember(RD, CSM, MD, SMI.ConstArg,
-   Diagnose);
+// For inherited constructors (non-null ICI), CSM may be passed so that MD
+// is treated as certain special member, which may not reflect what special
+// member MD really is. However inferCUDATargetForImplicitSpecialMember
+// expects CSM to match MD, therefore recalculate CSM.
+assert(ICI || CSM == getSpecialMember(MD));
+auto RealCSM = CSM;
+if (ICI)
+  RealCSM = getSpecialMember(MD);
+
+return inferCUDATargetForImplicitSpecialMember(RD, RealCSM, MD,
+   SMI.ConstArg, Diagnose);
   }
 
   return false;

Added: cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu?rev=344057&view=auto
==
--- cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu (added)
+++ cfe/trunk/test/SemaCUDA/implicit-member-target-inherited.cu Tue Oct  9 
08:53:14 2018
@@ -0,0 +1,205 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s 
-Wno-defaulted-function-deleted
+
+#include "Inputs/cuda.h"
+
+//--
+// Test 1: infer inherited default ctor to be host.
+
+struct A1_with_host_ctor {
+  A1_with_host_ctor() {}
+};
+// expected-note@-3 {{candidate constructor (the implicit copy constructor) 
not viable}}
+// expected-note@-4 {{candidate constructor (the implicit move constructor) 
not viable}}
+
+// The inherited default constructor is inferred to be host, so we'll encounter
+// an error when calling it from a __device__ function, but not from a __host__
+// function.
+struct B1_with_implicit_default_ctor : A1_with_host_ctor {
+  using A1_with_host_ctor::A1_with_host_ctor;
+};
+
+// expected-note@-4 {{call to __host__ function from __device__}}
+// expected-note@-5 {{candidate constructor (the implicit copy constructor) 
not viable}}
+// expected-note@-6 {{candidate constructor (the implicit move constructor) 
not viable}}
+// expected-note@-6 2{{constructor from base class 'A1_with_host_ctor' 
inherited here}}
+
+void hostfoo() {
+  B1_with_implicit_default_ctor b;
+}
+
+__device__ void devicefoo() {
+  B1_with_implicit_default_ctor b; // expected-error {{no matching 
constructor}}
+}
+
+//--
+// Test 2: infer inherited default ctor to be device.
+
+struct A2_with_device_ctor {
+  __device__ A2_with_device_ctor() {}
+};
+// expected-note@-3 {{candidate constructor (the implicit copy constructor) 
not viable}}
+// expected-note@-4 {{candidate constructor (the implicit move constructor) 
not viable}}
+
+struct B2_with_implicit_default_ctor : A2_with_device_ctor {
+  using A2_with_device_ctor::A2_with_device_ctor;
+};
+
+// expected-note@-4 {{call to __device__ function from __host__}}
+// expected-note@-5 {{candidate constructor (the implicit copy constructor) 
not viable}}
+// expected-note@-6 {{candidate constructor (the implicit move constructor) 
not viable}}
+// expected-note@-6 2{{constructor from b

r344630 - Disable code object version 3 for HIP toolchain

2018-10-16 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct 16 10:36:23 2018
New Revision: 344630

URL: http://llvm.org/viewvc/llvm-project?rev=344630&view=rev
Log:
Disable code object version 3 for HIP toolchain

AMDGPU backend will switch to code object version 3 by default.
Since HIP runtime is not ready, disable it until the runtime is ready.

Differential Revision: https://reviews.llvm.org/D53325

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip
cfe/trunk/test/Driver/hip-toolchain-rdc.hip

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=344630&r1=344629&r2=344630&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Tue Oct 16 10:36:23 2018
@@ -154,7 +154,7 @@ const char *AMDGCN::Linker::constructLlc
 llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
   // Construct llc command.
   ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa",
-"-filetype=obj",
+"-filetype=obj", "-mattr=-code-object-v3",
 Args.MakeArgString("-mcpu=" + SubArchName), "-o"};
   std::string LlcOutputFileName =
   C.getDriver().GetTemporaryPath(OutputFilePrefix, "o");

Modified: cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip?rev=344630&r1=344629&r2=344630&view=diff
==
--- cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip (original)
+++ cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip Tue Oct 16 10:36:23 2018
@@ -32,7 +32,9 @@
 // CHECK-SAME: "-o" [[OPT_BC_DEV_A_803:".*-gfx803-optimized.*bc"]]
 
 // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" 
[[OBJ_DEV_A_803:".*-gfx803-.*o"]]
+// CHECK-SAME: "-filetype=obj"
+// CHECK-SAME: "-mattr=-code-object-v3"
+// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".*-gfx803-.*o"]]
 
 // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]
@@ -57,7 +59,9 @@
 // CHECK-SAME: "-o" [[OPT_BC_DEV_A_900:".*-gfx900-optimized.*bc"]]
 
 // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" 
[[OBJ_DEV_A_900:".*-gfx900-.*o"]]
+// CHECK-SAME: "-filetype=obj"
+// CHECK-SAME: "-mattr=-code-object-v3"
+// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".*-gfx900-.*o"]]
 
 // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]
@@ -97,7 +101,9 @@
 // CHECK-SAME: "-o" [[OPT_BC_DEV_B_803:".*-gfx803-optimized.*bc"]]
 
 // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" 
[[OBJ_DEV_B_803:".*-gfx803-.*o"]]
+// CHECK-SAME: "-filetype=obj"
+// CHECK-SAME: "-mattr=-code-object-v3"
+// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".*-gfx803-.*o"]]
 
 // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]]
@@ -122,7 +128,9 @@
 // CHECK-SAME: "-o" [[OPT_BC_DEV_B_900:".*-gfx900-optimized.*bc"]]
 
 // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" 
[[OBJ_DEV_B_900:".*-gfx900-.*o"]]
+// CHECK-SAME: "-filetype=obj"
+// CHECK-SAME: "-mattr=-code-object-v3"
+// CHECK-SAME: "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".*-gfx900-.*o"]]
 
 // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]]

Modified: cfe/trunk/test/Driver/hip-toolchain-rdc.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain-rdc.hip?rev=344630&r1=344629&r2=344630&view=diff
==
--- cfe/trunk/test/Driver/hip-toolchain-rdc.hip (original)
+++ cfe/trunk/test/Driver/hip-toolchain-rdc.hip Tue Oct 16 10:36:23 2018
@@ -35,7 +35,9 @@
 // CHECK-SAME: "-o" [[OPT_BC_DEV1:".*-gfx803-optimized.*bc"]]
 
 // CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa"
-// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]]
+// CHECK-SAME: "-filetype=obj"
+// CHECK-SAME: "-mattr=-code-object-v3"
+// CHECK-SAME: "-mcpu=gfx803" "-o" [[OBJ_DEV1:".*-gfx803-.*o"]]
 
 // CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
 // CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]]
@@ -61,7 +63,9 @@
 // CHECK-SAME: "-o" [[OPT_BC_DEV2:".*-gfx900-optimized.*bc"]]
 
 // CHECK: [[LLC]] [[OPT_BC_DEV2]] "-mtriple=amdgcn-am

r344665 - AMDGPU: add __builtin_amdgcn_update_dpp

2018-10-16 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct 16 19:32:26 2018
New Revision: 344665

URL: http://llvm.org/viewvc/llvm-project?rev=344665&view=rev
Log:
AMDGPU: add __builtin_amdgcn_update_dpp

Emit llvm.amdgcn.update.dpp for both __builtin_amdgcn_mov_dpp and
__builtin_amdgcn_update_dpp. The first argument to
llvm.amdgcn.update.dpp will be undef for __builtin_amdgcn_mov_dpp.

Differential Revision: https://reviews.llvm.org/D52320

Modified:
cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl

Modified: cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def?rev=344665&r1=344664&r2=344665&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAMDGPU.def Tue Oct 16 19:32:26 2018
@@ -122,6 +122,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fracth,
 TARGET_BUILTIN(__builtin_amdgcn_classh, "bhi", "nc", "16-bit-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime")
 TARGET_BUILTIN(__builtin_amdgcn_mov_dpp, "iiIiIiIiIb", "nc", "dpp")
+TARGET_BUILTIN(__builtin_amdgcn_update_dpp, "iiiIiIiIiIb", "nc", "dpp")
 TARGET_BUILTIN(__builtin_amdgcn_s_dcache_wb, "v", "n", "vi-insts")
 
 
//===--===//

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=344665&r1=344664&r2=344665&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Oct 16 19:32:26 2018
@@ -11347,12 +11347,16 @@ Value *CodeGenFunction::EmitAMDGPUBuilti
 
   case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
 return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
-  case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
-llvm::SmallVector Args;
-for (unsigned I = 0; I != 5; ++I)
+  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
+  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
+llvm::SmallVector Args;
+for (unsigned I = 0; I != E->getNumArgs(); ++I)
   Args.push_back(EmitScalarExpr(E->getArg(I)));
-Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
-Args[0]->getType());
+assert(Args.size() == 5 || Args.size() == 6);
+if (Args.size() == 5)
+  Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
+Value *F =
+CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
 return Builder.CreateCall(F, Args);
   }
   case AMDGPU::BI__builtin_amdgcn_div_fixup:

Modified: cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl?rev=344665&r1=344664&r2=344665&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/builtins-amdgcn-vi.cl Tue Oct 16 19:32:26 2018
@@ -90,12 +90,19 @@ void test_s_dcache_wb()
 }
 
 // CHECK-LABEL: @test_mov_dpp
-// CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 
false)
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %src, i32 0, i32 
0, i32 0, i1 false)
 void test_mov_dpp(global int* out, int src)
 {
   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 }
 
+// CHECK-LABEL: @test_update_dpp
+// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, 
i32 0, i32 0, i1 false)
+void test_update_dpp(global int* out, int arg1, int arg2)
+{
+  *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
+}
+
 // CHECK-LABEL: @test_ds_fadd
 // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float 
%src, i32 0, i32 0, i1 false)
 void test_ds_faddf(local float *out, float src) {

Modified: cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl?rev=344665&r1=344664&r2=344665&view=diff
==
--- cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl (original)
+++ cfe/trunk/test/SemaOpenCL/builtins-amdgcn-error.cl Tue Oct 16 19:32:26 2018
@@ -102,6 +102,15 @@ void test_mov_dpp2(global int* out, int
   *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e); // expected-error {{argument 
to '__builtin_amdgcn_mov_dpp' must be a constant integer}}
 }
 
+void test_update_dpp2(global int* out, int a, int b, int c, int d, int e, bool 
f)
+{
+  *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, false);
+  *out = __builtin_amdgcn_update_dpp(a, 0, c, 0, 0, false); // expected-er

r344996 - Add gfx904 and gfx906 to GPU Arch

2018-10-22 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon Oct 22 19:05:31 2018
New Revision: 344996

URL: http://llvm.org/viewvc/llvm-project?rev=344996&view=rev
Log:
Add gfx904 and gfx906 to GPU Arch

Differential Revision: https://reviews.llvm.org/D53472

Modified:
cfe/trunk/include/clang/Basic/Cuda.h
cfe/trunk/lib/Basic/Cuda.cpp
cfe/trunk/lib/Basic/Targets/NVPTX.cpp

Modified: cfe/trunk/include/clang/Basic/Cuda.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Cuda.h?rev=344996&r1=344995&r2=344996&view=diff
==
--- cfe/trunk/include/clang/Basic/Cuda.h (original)
+++ cfe/trunk/include/clang/Basic/Cuda.h Mon Oct 22 19:05:31 2018
@@ -62,6 +62,8 @@ enum class CudaArch {
   GFX810,
   GFX900,
   GFX902,
+  GFX904,
+  GFX906,
   LAST,
 };
 const char *CudaArchToString(CudaArch A);

Modified: cfe/trunk/lib/Basic/Cuda.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Cuda.cpp?rev=344996&r1=344995&r2=344996&view=diff
==
--- cfe/trunk/lib/Basic/Cuda.cpp (original)
+++ cfe/trunk/lib/Basic/Cuda.cpp Mon Oct 22 19:05:31 2018
@@ -90,6 +90,10 @@ const char *CudaArchToString(CudaArch A)
 return "gfx900";
   case CudaArch::GFX902: // TBA
 return "gfx902";
+  case CudaArch::GFX904: // TBA
+return "gfx904";
+  case CudaArch::GFX906: // TBA
+return "gfx906";
   }
   llvm_unreachable("invalid enum");
 }
@@ -124,6 +128,8 @@ CudaArch StringToCudaArch(llvm::StringRe
   .Case("gfx810", CudaArch::GFX810)
   .Case("gfx900", CudaArch::GFX900)
   .Case("gfx902", CudaArch::GFX902)
+  .Case("gfx904", CudaArch::GFX904)
+  .Case("gfx906", CudaArch::GFX906)
   .Default(CudaArch::UNKNOWN);
 }
 
@@ -233,6 +239,8 @@ CudaVirtualArch VirtualArchForCudaArch(C
   case CudaArch::GFX810:
   case CudaArch::GFX900:
   case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
 return CudaVirtualArch::COMPUTE_AMDGCN;
   }
   llvm_unreachable("invalid enum");
@@ -277,6 +285,8 @@ CudaVersion MinVersionForCudaArch(CudaAr
   case CudaArch::GFX810:
   case CudaArch::GFX900:
   case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
 return CudaVersion::CUDA_70;
   }
   llvm_unreachable("invalid enum");

Modified: cfe/trunk/lib/Basic/Targets/NVPTX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/NVPTX.cpp?rev=344996&r1=344995&r2=344996&view=diff
==
--- cfe/trunk/lib/Basic/Targets/NVPTX.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/NVPTX.cpp Mon Oct 22 19:05:31 2018
@@ -188,6 +188,8 @@ void NVPTXTargetInfo::getTargetDefines(c
   case CudaArch::GFX810:
   case CudaArch::GFX900:
   case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
   case CudaArch::LAST:
 break;
   case CudaArch::UNKNOWN:


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r352488 - [CUDA][HIP] Do not diagnose use of _Float16

2019-01-29 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Jan 29 05:20:23 2019
New Revision: 352488

URL: http://llvm.org/viewvc/llvm-project?rev=352488&view=rev
Log:
[CUDA][HIP] Do not diagnose use of _Float16

r352221 caused regressions in CUDA/HIP since device function may use _Float16 
whereas host does not support it.
In this case host compilation should not diagnose usage of _Float16 in device 
functions or variables.

For now just do not diagnose _Float16 for CUDA/HIP. In the future we should 
have more precise check.

Differential Revision: https://reviews.llvm.org/D57369

Added:
cfe/trunk/test/SemaCUDA/float16.cu
Modified:
cfe/trunk/lib/Lex/LiteralSupport.cpp
cfe/trunk/lib/Sema/SemaType.cpp

Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=352488&r1=352487&r2=352488&view=diff
==
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Tue Jan 29 05:20:23 2019
@@ -616,8 +616,11 @@ NumericLiteralParser::NumericLiteralPars
   if (isHalf || isFloat || isLong || isFloat128)
 break; // HF, FF, LF, QF invalid.
 
-  if (PP.getTargetInfo().hasFloat16Type() && s + 2 < ThisTokEnd &&
-  s[1] == '1' && s[2] == '6') {
+  // CUDA host and device may have different _Float16 support, therefore
+  // allows f16 literals to avoid false alarm.
+  // ToDo: more precise check for CUDA.
+  if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) &&
+  s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
 s += 2; // success, eat up 2 characters.
 isFloat16 = true;
 continue;

Modified: cfe/trunk/lib/Sema/SemaType.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaType.cpp?rev=352488&r1=352487&r2=352488&view=diff
==
--- cfe/trunk/lib/Sema/SemaType.cpp (original)
+++ cfe/trunk/lib/Sema/SemaType.cpp Tue Jan 29 05:20:23 2019
@@ -1442,7 +1442,10 @@ static QualType ConvertDeclSpecToType(Ty
   Result = Context.Int128Ty;
 break;
   case DeclSpec::TST_float16:
-if (!S.Context.getTargetInfo().hasFloat16Type())
+// CUDA host and device may have different _Float16 support, therefore
+// do not diagnose _Float16 usage to avoid false alarm.
+// ToDo: more precise diagnostics for CUDA.
+if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA)
   S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
 << "_Float16";
 Result = Context.Float16Ty;

Added: cfe/trunk/test/SemaCUDA/float16.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/float16.cu?rev=352488&view=auto
==
--- cfe/trunk/test/SemaCUDA/float16.cu (added)
+++ cfe/trunk/test/SemaCUDA/float16.cu Tue Jan 29 05:20:23 2019
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+// expected-no-diagnostics
+#include "Inputs/cuda.h"
+
+__device__ void f(_Float16 x);
+
+__device__ _Float16 x = 1.0f16;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r352620 - [HIP] Fix size_t for MSVC environment

2019-01-30 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Jan 30 04:26:54 2019
New Revision: 352620

URL: http://llvm.org/viewvc/llvm-project?rev=352620&view=rev
Log:
[HIP] Fix size_t for MSVC environment

In 64 bit MSVC environment size_t is defined as unsigned long long.
In single source language like HIP, data layout should be consistent
in device and host compilation, therefore copy data layout controlling
fields from Aux target for AMDGPU target.

Differential Revision: https://reviews.llvm.org/D56318

Added:
cfe/trunk/test/SemaCUDA/amdgpu-size_t.cu
Modified:
cfe/trunk/include/clang/Basic/TargetInfo.h
cfe/trunk/lib/Basic/TargetInfo.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/lib/Frontend/CompilerInstance.cpp

Modified: cfe/trunk/include/clang/Basic/TargetInfo.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TargetInfo.h?rev=352620&r1=352619&r2=352620&view=diff
==
--- cfe/trunk/include/clang/Basic/TargetInfo.h (original)
+++ cfe/trunk/include/clang/Basic/TargetInfo.h Wed Jan 30 04:26:54 2019
@@ -48,22 +48,10 @@ class SourceManager;
 
 namespace Builtin { struct Info; }
 
-/// Exposes information about the current target.
-///
-class TargetInfo : public RefCountedBase {
-  std::shared_ptr TargetOpts;
-  llvm::Triple Triple;
-protected:
-  // Target values set by the ctor of the actual target implementation.  
Default
-  // values are specified by the TargetInfo constructor.
-  bool BigEndian;
-  bool TLSSupported;
-  bool VLASupported;
-  bool NoAsmVariants;  // True if {|} are normal characters.
-  bool HasLegalHalfType; // True if the backend supports operations on the half
- // LLVM IR type.
-  bool HasFloat128;
-  bool HasFloat16;
+/// Fields controlling how types are laid out in memory; these may need to
+/// be copied for targets like AMDGPU that base their ABIs on an auxiliary
+/// CPU target.
+struct TransferrableTargetInfo {
   unsigned char PointerWidth, PointerAlign;
   unsigned char BoolWidth, BoolAlign;
   unsigned char IntWidth, IntAlign;
@@ -104,15 +92,92 @@ protected:
   unsigned char SuitableAlign;
   unsigned char DefaultAlignForAttributeAligned;
   unsigned char MinGlobalAlign;
-  unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
+
+  unsigned short NewAlign;
   unsigned short MaxVectorAlign;
   unsigned short MaxTLSAlign;
+
+  const llvm::fltSemantics *HalfFormat, *FloatFormat, *DoubleFormat,
+*LongDoubleFormat, *Float128Format;
+
+  ///=== Target Data Type Query Methods 
---===//
+  enum IntType {
+NoInt = 0,
+SignedChar,
+UnsignedChar,
+SignedShort,
+UnsignedShort,
+SignedInt,
+UnsignedInt,
+SignedLong,
+UnsignedLong,
+SignedLongLong,
+UnsignedLongLong
+  };
+
+  enum RealType {
+NoFloat = 255,
+Float = 0,
+Double,
+LongDouble,
+Float128
+  };
+protected:
+  IntType SizeType, IntMaxType, PtrDiffType, IntPtrType, WCharType,
+  WIntType, Char16Type, Char32Type, Int64Type, SigAtomicType,
+  ProcessIDType;
+
+  /// Whether Objective-C's built-in boolean type should be signed char.
+  ///
+  /// Otherwise, when this flag is not set, the normal built-in boolean type is
+  /// used.
+  unsigned UseSignedCharForObjCBool : 1;
+
+  /// Control whether the alignment of bit-field types is respected when laying
+  /// out structures. If true, then the alignment of the bit-field type will be
+  /// used to (a) impact the alignment of the containing structure, and (b)
+  /// ensure that the individual bit-field will not straddle an alignment
+  /// boundary.
+  unsigned UseBitFieldTypeAlignment : 1;
+
+  /// Whether zero length bitfields (e.g., int : 0;) force alignment of
+  /// the next bitfield.
+  ///
+  /// If the alignment of the zero length bitfield is greater than the member
+  /// that follows it, `bar', `bar' will be aligned as the type of the
+  /// zero-length bitfield.
+  unsigned UseZeroLengthBitfieldAlignment : 1;
+
+  ///  Whether explicit bit field alignment attributes are honored.
+  unsigned UseExplicitBitFieldAlignment : 1;
+
+  /// If non-zero, specifies a fixed alignment value for bitfields that follow
+  /// zero length bitfield, regardless of the zero length bitfield type.
+  unsigned ZeroLengthBitfieldBoundary;
+};
+
+/// Exposes information about the current target.
+///
+class TargetInfo : public virtual TransferrableTargetInfo,
+   public RefCountedBase {
+  std::shared_ptr TargetOpts;
+  llvm::Triple Triple;
+protected:
+  // Target values set by the ctor of the actual target implementation.  
Default
+  // values are specified by the TargetInfo constructor.
+  bool BigEndian;
+  bool TLSSupported;
+  bool VLASupported;
+  bool NoAsmVariants;  // True if {|} are normal characters.
+  bool HasLegalHalfType; // True if the backend supports operations on the half
+

r346413 - Fix bitcast to address space cast for coerced load/stores

2018-11-08 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Nov  8 08:55:46 2018
New Revision: 346413

URL: http://llvm.org/viewvc/llvm-project?rev=346413&view=rev
Log:
Fix bitcast to address space cast for coerced load/stores 

Coerced load/stores through memory do not take into account potential
address space differences when it creates its bitcasts.

Patch by David Salinas.

Differential Revision: https://reviews.llvm.org/D53780

Added:
cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp
Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=346413&r1=346412&r2=346413&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Nov  8 08:55:46 2018
@@ -1253,8 +1253,8 @@ static llvm::Value *CreateCoercedLoad(Ad
 
   // Otherwise do coercion through memory. This is stupid, but simple.
   Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
-  Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
-  Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy);
+  Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+  Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
   CGF.Builder.CreateMemCpy(Casted, SrcCasted,
   llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
   false);
@@ -1335,8 +1335,8 @@ static void CreateCoercedStore(llvm::Val
 // to that information.
 Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
 CGF.Builder.CreateStore(Src, Tmp);
-Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy);
-Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy);
+Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
 CGF.Builder.CreateMemCpy(DstCasted, Casted,
 llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
 false);

Added: cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp?rev=346413&view=auto
==
--- cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/address-space-cast-coerce.cpp Thu Nov  8 08:55:46 
2018
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
+
+template struct my_vector_base;
+
+template
+struct my_vector_base {
+typedef T Native_vec_ __attribute__((ext_vector_type(1)));
+
+union {
+Native_vec_ data;
+struct {
+T x;
+};
+};
+};
+
+template
+struct my_vector_type : public my_vector_base {
+using my_vector_base::data;
+using typename my_vector_base::Native_vec_;
+
+template< typename U>
+my_vector_type(U x) noexcept
+{
+for (auto i = 0u; i != rank; ++i) data[i] = x;
+}
+my_vector_type& operator+=(const my_vector_type& x) noexcept
+{
+data += x.data;
+return *this;
+}
+};
+
+template
+inline
+my_vector_type operator+(
+const my_vector_type& x, const my_vector_type& y) noexcept
+{
+return my_vector_type{x} += y;
+}
+
+using char1 = my_vector_type;
+
+int mane() {
+
+char1 f1{1};
+char1 f2{1};
+
+// CHECK: %[[a:[^ ]+]] = addrspacecast i16 addrspace(5)* %{{[^ ]+}} to i16*
+// CHECK: %[[a:[^ ]+]] = addrspacecast %{{[^ ]+}} addrspace(5)* %{{[^ ]+}} to 
%{{[^ ]+}} 
+
+char1 f3 = f1 + f2;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r346536 - [HIP] Remove useless sections in linked files

2018-11-09 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Nov  9 10:52:05 2018
New Revision: 346536

URL: http://llvm.org/viewvc/llvm-project?rev=346536&view=rev
Log:
[HIP] Remove useless sections in linked files

clang-offload-bundler creates __CLANG_OFFLOAD_BUNDLE__* sections in the bundles,
which get into the linked files. These sections are useless after linking. They 
waste disk
space and cause confusion for clang when directly linked with other object 
files, therefore
should be removed.

Differential Revision: https://reviews.llvm.org/D54275

Modified:
cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp

Modified: cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp?rev=346536&r1=346535&r2=346536&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp Fri Nov  9 10:52:05 2018
@@ -1436,6 +1436,10 @@ void tools::AddHIPLinkerScript(const Too
   LksStream << "PROVIDE_HIDDEN(__hip_fatbin = .);\n";
   LksStream << "" << BundleFileName << "\n";
   LksStream << "  }\n";
+  LksStream << "  /DISCARD/ :\n";
+  LksStream << "  {\n";
+  LksStream << "* ( __CLANG_OFFLOAD_BUNDLE__* )\n";
+  LksStream << "  }\n";
   LksStream << "}\n";
   LksStream << "INSERT BEFORE .data\n";
   LksStream.flush();


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r346828 - [HIP] Fix device only compilation

2018-11-13 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Nov 13 20:47:31 2018
New Revision: 346828

URL: http://llvm.org/viewvc/llvm-project?rev=346828&view=rev
Log:
[HIP] Fix device only compilation

Fix a bug causing host code being compiled when --cude-device-only is set.

Differential Revision: https://reviews.llvm.org/D54496

Modified:
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/cuda-phases.cu

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=346828&r1=346827&r2=346828&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Tue Nov 13 20:47:31 2018
@@ -2616,17 +2616,19 @@ class OffloadingActionBuilder final {
 C.MakeAction(CudaDeviceActions,
 types::TY_HIP_FATBIN);
 
-DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
-   AssociatedOffloadKind);
-// Clear the fat binary, it is already a dependence to an host
-// action.
-CudaFatBinary = nullptr;
+if (!CompileDeviceOnly) {
+  DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
+ AssociatedOffloadKind);
+  // Clear the fat binary, it is already a dependence to an host
+  // action.
+  CudaFatBinary = nullptr;
+}
 
 // Remove the CUDA actions as they are already connected to an host
 // action or fat binary.
 CudaDeviceActions.clear();
 
-return ABRT_Success;
+return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
   } else if (CurPhase == phases::Link) {
 // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
 // This happens to each device action originated from each input file.
@@ -3014,8 +3016,10 @@ public:
 }
 
 // If we can use the bundler, replace the host action by the bundling one 
in
-// the resulting list. Otherwise, just append the device actions.
-if (CanUseBundler && !OffloadAL.empty()) {
+// the resulting list. Otherwise, just append the device actions. For
+// device only compilation, HostAction is a null pointer, therefore only do
+// this when HostAction is not a null pointer.
+if (CanUseBundler && HostAction && !OffloadAL.empty()) {
   // Add the host action to the list in order to create the bundling 
action.
   OffloadAL.push_back(HostAction);
 

Modified: cfe/trunk/test/Driver/cuda-phases.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/cuda-phases.cu?rev=346828&r1=346827&r2=346828&view=diff
==
--- cfe/trunk/test/Driver/cuda-phases.cu (original)
+++ cfe/trunk/test/Driver/cuda-phases.cu Tue Nov 13 20:47:31 2018
@@ -157,6 +157,7 @@
 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
+// HBIN-NOT: device
 //
 // Test single gpu architecture up to the assemble phase in host-only
 // compilation mode.
@@ -172,6 +173,7 @@
 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(host-[[T]])
 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HASM-NOT: device
 
 //
 // Test two gpu architectures with complete compilation in host-only
@@ -190,6 +192,7 @@
 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
+// HBIN2-NOT: device
 
 //
 // Test two gpu architectures up to the assemble phase in host-only
@@ -206,6 +209,7 @@
 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(host-[[T]])
 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
+// HASM2-NOT: device
 
 //
 // Test single gpu architecture with complete compilation in device-only
@@ -224,7 +228,7 @@
 // DBIN_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
 // DBIN_NV-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], 
[[ARCH]])
 // DBIN_NV-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] 
(nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
-
+// DBIN-NOT: host
 //
 // Test single gpu architecture up to the assemble phase in device-only
 // compilation mode.
@@ -241,6 +245,7 @@
 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 // DASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], 
[[ARCH]])
 // DASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] 
([[TRIPLE:nvptx64-nvidia-cuda|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
+// DASM-NO

r354893 - [OpenCL] Fix assertion due to blocks

2019-02-26 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Feb 26 08:20:41 2019
New Revision: 354893

URL: http://llvm.org/viewvc/llvm-project?rev=354893&view=rev
Log:
[OpenCL] Fix assertion due to blocks

A recent change caused assertion in CodeGenFunction::EmitBlockCallExpr when a 
block is called.

There is code

  Func = CGM.getOpenCLRuntime().getInvokeFunction(E->getCallee());
getCalleeDecl calls Expr::getReferencedDeclOfCallee, which does not handle
BlockExpr and returns nullptr, which causes isa to assert.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D58658

Modified:
cfe/trunk/lib/AST/Expr.cpp
cfe/trunk/test/CodeGenOpenCL/blocks.cl

Modified: cfe/trunk/lib/AST/Expr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=354893&r1=354892&r2=354893&view=diff
==
--- cfe/trunk/lib/AST/Expr.cpp (original)
+++ cfe/trunk/lib/AST/Expr.cpp Tue Feb 26 08:20:41 2019
@@ -1358,6 +1358,8 @@ Decl *Expr::getReferencedDeclOfCallee()
 return DRE->getDecl();
   if (MemberExpr *ME = dyn_cast(CEE))
 return ME->getMemberDecl();
+  if (auto *BE = dyn_cast(CEE))
+return BE->getBlockDecl();
 
   return nullptr;
 }

Modified: cfe/trunk/test/CodeGenOpenCL/blocks.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/blocks.cl?rev=354893&r1=354892&r2=354893&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/blocks.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/blocks.cl Tue Feb 26 08:20:41 2019
@@ -90,6 +90,12 @@ int get42() {
   return blockArgFunc(^{return 42;});
 }
 
+// COMMON-LABEL: define {{.*}}@call_block
+// call {{.*}}@__call_block_block_invoke
+int call_block() {
+  return ^int(int num) { return num; } (11);
+}
+
 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size"
 // CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align"
 


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r354929 - [CUDA][HIP] Check calling convention based on function target

2019-02-26 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Feb 26 14:24:49 2019
New Revision: 354929

URL: http://llvm.org/viewvc/llvm-project?rev=354929&view=rev
Log:
[CUDA][HIP] Check calling convention based on function target

MSVC header files using vectorcall to differentiate overloaded functions, which
causes failure for AMDGPU target. This is because clang does not check function
calling convention based on function target.

This patch checks calling convention using the proper target info.

Differential Revision: https://reviews.llvm.org/D57716

Added:
cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu
Modified:
cfe/trunk/lib/Sema/SemaDeclAttr.cpp

Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=354929&r1=354928&r2=354929&view=diff
==
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Tue Feb 26 14:24:49 2019
@@ -4615,8 +4615,36 @@ bool Sema::CheckCallingConvAttr(const Pa
   default: llvm_unreachable("unexpected attribute kind");
   }
 
+  TargetInfo::CallingConvCheckResult A = TargetInfo::CCCR_OK;
   const TargetInfo &TI = Context.getTargetInfo();
-  TargetInfo::CallingConvCheckResult A = TI.checkCallingConvention(CC);
+  auto *Aux = Context.getAuxTargetInfo();
+  if (LangOpts.CUDA) {
+auto CudaTarget = IdentifyCUDATarget(FD);
+bool CheckHost = false, CheckDevice = false;
+switch (CudaTarget) {
+case CFT_HostDevice:
+  CheckHost = true;
+  CheckDevice = true;
+  break;
+case CFT_Host:
+  CheckHost = true;
+  break;
+case CFT_Device:
+case CFT_Global:
+  CheckDevice = true;
+  break;
+case CFT_InvalidTarget:
+  llvm_unreachable("unexpected cuda target");
+}
+auto *HostTI = LangOpts.CUDAIsDevice ? Aux : &TI;
+auto *DeviceTI = LangOpts.CUDAIsDevice ? &TI : Aux;
+if (CheckHost && HostTI)
+  A = HostTI->checkCallingConvention(CC);
+if (A == TargetInfo::CCCR_OK && CheckDevice && DeviceTI)
+  A = DeviceTI->checkCallingConvention(CC);
+  } else {
+A = TI.checkCallingConvention(CC);
+  }
   if (A != TargetInfo::CCCR_OK) {
 if (A == TargetInfo::CCCR_Warning)
   Diag(Attrs.getLoc(), diag::warn_cconv_ignored) << Attrs;

Added: cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu?rev=354929&view=auto
==
--- cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu (added)
+++ cfe/trunk/test/SemaCUDA/amdgpu-windows-vectorcall.cu Tue Feb 26 14:24:49 
2019
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple 
x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only 
-verify %s
+
+__cdecl void hostf1();
+__vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize 
a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of 
type 'void () __attribute__((cdecl))'}}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r354948 - [HIP] change kernel stub name

2019-02-26 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Feb 26 18:02:52 2019
New Revision: 354948

URL: http://llvm.org/viewvc/llvm-project?rev=354948&view=rev
Log:
[HIP] change kernel stub name

Add .stub to kernel stub function name so that it is different from kernel
name in device code. This is necessary to let debugger find correct symbol
for kernel.

Differential Revision: https://reviews.llvm.org/D58518

Added:
cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu
Modified:
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/CodeGen/CodeGenModule.cpp

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354948&r1=354947&r2=354948&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Tue Feb 26 18:02:52 2019
@@ -218,6 +218,7 @@ std::string CGNVCUDARuntime::getDeviceSi
 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
  FunctionArgList &Args) {
   assert(getDeviceSideName(CGF.CurFuncDecl) == CGF.CurFn->getName() ||
+ getDeviceSideName(CGF.CurFuncDecl) + ".stub" == CGF.CurFn->getName() 
||
  CGF.CGM.getContext().getTargetInfo().getCXXABI() !=
  CGF.CGM.getContext().getAuxTargetInfo()->getCXXABI());
 

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=354948&r1=354947&r2=354948&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Feb 26 18:02:52 2019
@@ -1048,8 +1048,17 @@ StringRef CodeGenModule::getMangledName(
 
   // Keep the first result in the case of a mangling collision.
   const auto *ND = cast(GD.getDecl());
-  auto Result =
-  Manglings.insert(std::make_pair(getMangledNameImpl(*this, GD, ND), GD));
+  std::string MangledName = getMangledNameImpl(*this, GD, ND);
+
+  // Postfix kernel stub names with .stub to differentiate them from kernel
+  // names in device binaries. This is to facilitate the debugger to find
+  // the correct symbols for kernels in the device binary.
+  if (auto *FD = dyn_cast(GD.getDecl()))
+if (getLangOpts().HIP && !getLangOpts().CUDAIsDevice &&
+FD->hasAttr())
+  MangledName = MangledName + ".stub";
+
+  auto Result = Manglings.insert(std::make_pair(MangledName, GD));
   return MangledDeclNames[CanonicalGD] = Result.first->first();
 }
 

Added: cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu?rev=354948&view=auto
==
--- cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/kernel-stub-name.cu Tue Feb 26 18:02:52 2019
@@ -0,0 +1,20 @@
+// RUN: echo "GPU binary would be here" > %t
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
+// RUN: -fcuda-include-gpubinary %t -o - -x hip\
+// RUN:   | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK
+
+#include "Inputs/cuda.h"
+
+template
+__global__ void kernelfunc() {}
+
+// CHECK-LABEL: define{{.*}}@_Z8hostfuncv()
+// CHECK: call void @[[STUB:_Z10kernelfuncIiEvv.stub]]()
+void hostfunc(void) { kernelfunc<<<1, 1>>>(); }
+
+// CHECK: define{{.*}}@[[STUB]]
+// CHECK: call{{.*}}@hipLaunchByPtr{{.*}}@[[STUB]]
+
+// CHECK-LABEL: define{{.*}}@__hip_register_globals
+// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[STUB]]


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r354990 - [NFC] minor revision of r354929 [CUDA][HIP] Check calling convention based on function target

2019-02-27 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Feb 27 07:46:29 2019
New Revision: 354990

URL: http://llvm.org/viewvc/llvm-project?rev=354990&view=rev
Log:
[NFC] minor revision of r354929 [CUDA][HIP] Check calling convention based on 
function target

Add comments and move a variable to if block.

Differential Revision: https://reviews.llvm.org/D57716

Modified:
cfe/trunk/lib/Sema/SemaDeclAttr.cpp

Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=354990&r1=354989&r2=354990&view=diff
==
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Wed Feb 27 07:46:29 2019
@@ -4617,8 +4617,12 @@ bool Sema::CheckCallingConvAttr(const Pa
 
   TargetInfo::CallingConvCheckResult A = TargetInfo::CCCR_OK;
   const TargetInfo &TI = Context.getTargetInfo();
-  auto *Aux = Context.getAuxTargetInfo();
+  // CUDA functions may have host and/or device attributes which indicate
+  // their targeted execution environment, therefore the calling convention
+  // of functions in CUDA should be checked against the target deduced based
+  // on their host/device attributes.
   if (LangOpts.CUDA) {
+auto *Aux = Context.getAuxTargetInfo();
 auto CudaTarget = IdentifyCUDATarget(FD);
 bool CheckHost = false, CheckDevice = false;
 switch (CudaTarget) {


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r355106 - Partial revert of r353952: [HIP] Handle compile -m options and propagate into LLC

2019-02-28 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Feb 28 09:08:26 2019
New Revision: 355106

URL: http://llvm.org/viewvc/llvm-project?rev=355106&view=rev
Log:
Partial revert of r353952: [HIP] Handle compile -m options and propagate into 
LLC

Remove comments and tests about passing -mcode-object-v3 to driver since it does
not work. Other -m options are OK.

Also put back -mattr=-code-object-v3 since HIP is still not ready for code 
object
v3.

Differential Revision: https://reviews.llvm.org/D57977

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/test/Driver/hip-toolchain-features.hip

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=355106&r1=355105&r2=355106&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Feb 28 09:08:26 2019
@@ -159,7 +159,7 @@ const char *AMDGCN::Linker::constructLlc
 llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
   // Construct llc command.
   ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa",
-"-filetype=obj",
+"-filetype=obj", "-mattr=-code-object-v3",
 Args.MakeArgString("-mcpu=" + SubArchName)};
 
   // Extract all the -m options
@@ -167,7 +167,7 @@ const char *AMDGCN::Linker::constructLlc
   handleTargetFeaturesGroup(
 Args, Features, options::OPT_m_amdgpu_Features_Group);
 
-  // Add features to mattr such as code-object-v3 and xnack
+  // Add features to mattr such as xnack
   std::string MAttrString = "-mattr=";
   for(auto OneFeature : Features) {
 MAttrString.append(Args.MakeArgString(OneFeature));

Modified: cfe/trunk/test/Driver/hip-toolchain-features.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-toolchain-features.hip?rev=355106&r1=355105&r2=355106&view=diff
==
--- cfe/trunk/test/Driver/hip-toolchain-features.hip (original)
+++ cfe/trunk/test/Driver/hip-toolchain-features.hip Thu Feb 28 09:08:26 2019
@@ -4,17 +4,6 @@
 
 // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
-// RUN:   -mcode-object-v3 2>&1 | FileCheck %s -check-prefix=COV3
-// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
-// RUN:   -mno-code-object-v3 2>&1 | FileCheck %s -check-prefix=NOCOV3
-
-// COV3: {{.*}}clang{{.*}}"-target-feature" "+code-object-v3"
-// NOCOV3: {{.*}}clang{{.*}}"-target-feature" "-code-object-v3"
-
-
-// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
-// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 // RUN:   -mxnack 2>&1 | FileCheck %s -check-prefix=XNACK
 // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
@@ -37,12 +26,12 @@
 
 // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
-// RUN:   -mcode-object-v3 -mxnack -msram-ecc \
+// RUN:   -mxnack -msram-ecc \
 // RUN:   2>&1 | FileCheck %s -check-prefix=ALL3
 // RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
 // RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
-// RUN:   -mno-code-object-v3 -mno-xnack -mno-sram-ecc \
+// RUN:   -mno-xnack -mno-sram-ecc \
 // RUN:   2>&1 | FileCheck %s -check-prefix=NOALL3
 
-// ALL3: {{.*}}clang{{.*}}"-target-feature" "+code-object-v3" 
"-target-feature" "+xnack" "-target-feature" "+sram-ecc"
-// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-code-object-v3" 
"-target-feature" "-xnack" "-target-feature" "-sram-ecc"
+// ALL3: {{.*}}clang{{.*}}"-target-feature" "+xnack" "-target-feature" 
"+sram-ecc"
+// NOALL3: {{.*}}clang{{.*}}"-target-feature" "-xnack" "-target-feature" 
"-sram-ecc"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r355410 - [HIP] Do not unbundle object files for -fno-gpu-rdc

2019-03-05 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Mar  5 08:07:56 2019
New Revision: 355410

URL: http://llvm.org/viewvc/llvm-project?rev=355410&view=rev
Log:
[HIP] Do not unbundle object files for -fno-gpu-rdc

When -fno-gpu-rdc is set, device code is compiled, linked, and assembled into 
fat binary
and embedded as string in object files. The object files are normal object 
files which
can be linked by host linker. In the linking stage, the object files should not 
be unbundled
when -fno-gpu-rdc is set since they are normal object files, not bundles. The 
object files
only need to be unbundled when -fgpu-rdc is set.

Currently clang always unbundles object files, disregarding -fgpu-rdc option.

This patch fixes that.

Differential Revision: https://reviews.llvm.org/D58917

Modified:
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/test/Driver/hip-binding.hip
cfe/trunk/test/Driver/hip-link-shared-library.hip

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=355410&r1=355409&r2=355410&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Tue Mar  5 08:07:56 2019
@@ -2293,6 +2293,9 @@ class OffloadingActionBuilder final {
 
 /// Flag that is set to true if this builder acted on the current input.
 bool IsActive = false;
+
+/// Flag for -fgpu-rdc.
+bool Relocatable = false;
   public:
 CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
   const Driver::InputList &Inputs,
@@ -2338,6 +2341,12 @@ class OffloadingActionBuilder final {
 
   // If this is an unbundling action use it as is for each CUDA toolchain.
   if (auto *UA = dyn_cast(HostAction)) {
+
+// If -fgpu-rdc is disabled, should not unbundle since there is no
+// device code to link.
+if (!Relocatable)
+  return ABRT_Inactive;
+
 CudaDeviceActions.clear();
 auto *IA = cast(UA->getInputs().back());
 std::string FileName = IA->getInputArg().getAsString(Args);
@@ -2409,6 +2418,9 @@ class OffloadingActionBuilder final {
   !C.hasOffloadToolChain())
 return false;
 
+  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
+  options::OPT_fno_gpu_rdc, /*Default=*/false);
+
   const ToolChain *HostTC = 
C.getSingleOffloadToolChain();
   assert(HostTC && "No toolchain for host compilation.");
   if (HostTC->getTriple().isNVPTX() ||
@@ -2594,13 +2606,11 @@ class OffloadingActionBuilder final {
   class HIPActionBuilder final : public CudaActionBuilderBase {
 /// The linker inputs obtained for each device arch.
 SmallVector DeviceLinkerInputs;
-bool Relocatable;
 
   public:
 HIPActionBuilder(Compilation &C, DerivedArgList &Args,
  const Driver::InputList &Inputs)
-: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP),
-  Relocatable(false) {}
+: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
 
 bool canUseBundlerUnbundler() const override { return true; }
 
@@ -2705,13 +2715,6 @@ class OffloadingActionBuilder final {
 ++I;
   }
 }
-
-bool initialize() override {
-  Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
-  options::OPT_fno_gpu_rdc, /*Default=*/false);
-
-  return CudaActionBuilderBase::initialize();
-}
   };
 
   /// OpenMP action builder. The host bitcode is passed to the device frontend

Modified: cfe/trunk/test/Driver/hip-binding.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-binding.hip?rev=355410&r1=355409&r2=355410&view=diff
==
--- cfe/trunk/test/Driver/hip-binding.hip (original)
+++ cfe/trunk/test/Driver/hip-binding.hip Tue Mar  5 08:07:56 2019
@@ -4,7 +4,7 @@
 
 // RUN: touch %t.o
 // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
 // RUN: 2>&1 | FileCheck %s
 
 // CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], 
outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] 
@@ -13,3 +13,10 @@
 // CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], 
output: "[[IMG3:.*out]]"
 // CHECK-NOT: offload bundler
 // CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", 
"[[IMG2]]", "[[IMG3]]"], output: "a.out"
+
+// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s
+
+// NORDC-NOT: offload bundler
+// NORDC: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["{{.*o}}"], 
output: "a.out"

Modified: cfe/trunk/test/Driver/hip-link-shared-library.hip
URL: 
http://llvm.org

r355419 - Allow bundle size to be 0 in clang-offload-bundler

2019-03-05 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Mar  5 09:52:32 2019
New Revision: 355419

URL: http://llvm.org/viewvc/llvm-project?rev=355419&view=rev
Log:
Allow bundle size to be 0 in clang-offload-bundler

HIP uses clang-offload-bundler to create fat binary. The bundle for host is 
empty.
Currently clang-offload-bundler checks if the bundle size is 0 when unbundling.
If so it will exit without unbundling the remaining bundles. This causes
clang-offload-bundler not being able to unbundle fat binaries generated for HIP.

This patch allows bundles size to be 0 when clang-offload-bundler unbundles
input files.

Differential Revision: https://reviews.llvm.org/D58057

Modified:
cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp

Modified: cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp?rev=355419&r1=355418&r2=355419&view=diff
==
--- cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp (original)
+++ cfe/trunk/tools/clang-offload-bundler/ClangOffloadBundler.cpp Tue Mar  5 
09:52:32 2019
@@ -292,7 +292,7 @@ public:
   ReadChars += TripleSize;
 
   // Check if the offset and size make sense.
-  if (!Size || !Offset || Offset + Size > FC.size())
+  if (!Offset || Offset + Size > FC.size())
 return;
 
   assert(BundlesInfo.find(Triple) == BundlesInfo.end() &&


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r355421 - [CUDA][HIP][Sema] Fix template kernel with function as template parameter

2019-03-05 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Mar  5 10:19:35 2019
New Revision: 355421

URL: http://llvm.org/viewvc/llvm-project?rev=355421&view=rev
Log:
[CUDA][HIP][Sema] Fix template kernel with function as template parameter

If a kernel template has a function as its template parameter, a device 
function should be
allowed as template argument since a kernel can call a device function. However,
currently if the kernel template is instantiated in a host function, clang will 
emit an error
message saying the device function is an invalid candidate for the template 
parameter.

This happens because clang checks the reference to the device function during 
parsing
the template arguments. At this point, the template is not instantiated yet. 
Clang incorrectly
assumes the device function is called by the host function and emits the error 
message.

This patch fixes the issue by disabling checking of device function during 
parsing template
arguments and deferring the check to the instantion of the template. At that 
point, the
template decl is already available, therefore the check can be done against the 
instantiated
function template decl.

Differential Revision: https://reviews.llvm.org/D56411

Modified:
cfe/trunk/lib/Sema/SemaCUDA.cpp
cfe/trunk/lib/Sema/SemaExpr.cpp
cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu
cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu

Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=355421&r1=355420&r2=355421&view=diff
==
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Tue Mar  5 10:19:35 2019
@@ -675,6 +675,11 @@ Sema::DeviceDiagBuilder Sema::CUDADiagIf
 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   assert(getLangOpts().CUDA && "Should only be called during CUDA 
compilation");
   assert(Callee && "Callee may not be null.");
+
+  auto &ExprEvalCtx = ExprEvalContexts.back();
+  if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated())
+return true;
+
   // FIXME: Is bailing out early correct here?  Should we instead assume that
   // the caller is a global initializer?
   FunctionDecl *Caller = dyn_cast(CurContext);

Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=355421&r1=355420&r2=355421&view=diff
==
--- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaExpr.cpp Tue Mar  5 10:19:35 2019
@@ -14799,6 +14799,9 @@ void Sema::MarkFunctionReferenced(Source
   if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType()))
 ResolveExceptionSpec(Loc, FPT);
 
+  if (getLangOpts().CUDA)
+CheckCUDACall(Loc, Func);
+
   // If we don't need to mark the function as used, and we don't need to
   // try to provide a definition, there's nothing more to do.
   if ((Func->isUsed(/*CheckUsedAttr=*/false) || !OdrUse) &&

Modified: cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu?rev=355421&r1=355420&r2=355421&view=diff
==
--- cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu (original)
+++ cfe/trunk/test/SemaCUDA/call-device-fn-from-host.cu Tue Mar  5 10:19:35 2019
@@ -37,7 +37,7 @@ __host__ __device__ void T::hd3() {
 }
 
 template  __host__ __device__ void hd2() { device_fn(); }
-// expected-error@-1 {{reference to __device__ function 'device_fn' in 
__host__ __device__ function}}
+// expected-error@-1 2 {{reference to __device__ function 'device_fn' in 
__host__ __device__ function}}
 void host_fn() { hd2(); }
 
 __host__ __device__ void hd() { device_fn(); }
@@ -90,3 +90,8 @@ __host__ __device__ void fn_ptr_template
 static __host__ __device__ void hd_func() { device_fn(); }
 __global__ void kernel() { hd_func(); }
 void host_func(void) { kernel<<<1, 1>>>(); }
+
+// Should allow host function call kernel template with device function 
argument.
+__device__ void f();
+template __global__ void t() { F(); }
+__host__ void g() { t<<<1,1>>>(); }

Modified: cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu?rev=355421&r1=355420&r2=355421&view=diff
==
--- cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu (original)
+++ cfe/trunk/test/SemaCUDA/call-host-fn-from-device.cu Tue Mar  5 10:19:35 2019
@@ -56,14 +56,14 @@ __host__ __device__ void T::hd3() {
 }
 
 template  __host__ __device__ void hd2() { host_fn(); }
-// expected-error@-1 {{reference to __host__ function 'host_fn' in __host__ 
__device__ function}}
+// expected-error@-1 2 {{reference to __host__ f

r358290 - [HIP] Use -mlink-builtin-bitcode to link device library

2019-04-12 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Apr 12 09:23:31 2019
New Revision: 358290

URL: http://llvm.org/viewvc/llvm-project?rev=358290&view=rev
Log:
[HIP] Use -mlink-builtin-bitcode to link device library

Use -mlink-builtin-bitcode instead of llvm-link to link
device library so that device library bitcode and user
device code can be compiled in a consistent way.

This is the same approach used by CUDA and OpenMP.

Differential Revision: https://reviews.llvm.org/D60513

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/test/Driver/hip-device-libs.hip
cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip
cfe/trunk/test/Driver/hip-toolchain-rdc.hip

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=358290&r1=358289&r2=358290&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Fri Apr 12 09:23:31 2019
@@ -31,7 +31,7 @@ using namespace llvm::opt;
 
 namespace {
 
-static void addBCLib(Compilation &C, const ArgList &Args,
+static void addBCLib(const Driver &D, const ArgList &Args,
  ArgStringList &CmdArgs, ArgStringList LibraryPaths,
  StringRef BCName) {
   StringRef FullName;
@@ -40,11 +40,12 @@ static void addBCLib(Compilation &C, con
 llvm::sys::path::append(Path, BCName);
 FullName = Path;
 if (llvm::sys::fs::exists(FullName)) {
+  CmdArgs.push_back("-mlink-builtin-bitcode");
   CmdArgs.push_back(Args.MakeArgString(FullName));
   return;
 }
   }
-  C.getDriver().Diag(diag::err_drv_no_such_file) << BCName;
+  D.Diag(diag::err_drv_no_such_file) << BCName;
 }
 
 } // namespace
@@ -58,44 +59,6 @@ const char *AMDGCN::Linker::constructLLV
   for (const auto &II : Inputs)
 CmdArgs.push_back(II.getFilename());
 
-  ArgStringList LibraryPaths;
-
-  // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
-  for (auto Path : Args.getAllArgValues(options::OPT_hip_device_lib_path_EQ))
-LibraryPaths.push_back(Args.MakeArgString(Path));
-
-  addDirectoryList(Args, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH");
-
-  llvm::SmallVector BCLibs;
-
-  // Add bitcode library in --hip-device-lib.
-  for (auto Lib : Args.getAllArgValues(options::OPT_hip_device_lib_EQ)) {
-BCLibs.push_back(Args.MakeArgString(Lib));
-  }
-
-  // If --hip-device-lib is not set, add the default bitcode libraries.
-  if (BCLibs.empty()) {
-// Get the bc lib file name for ISA version. For example,
-// gfx803 => oclc_isa_version_803.amdgcn.bc.
-std::string ISAVerBC =
-"oclc_isa_version_" + SubArchName.drop_front(3).str() + ".amdgcn.bc";
-
-llvm::StringRef FlushDenormalControlBC;
-if (Args.hasArg(options::OPT_fcuda_flush_denormals_to_zero))
-  FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc";
-else
-  FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
-
-BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc",
-   "ocml.amdgcn.bc", "ockl.amdgcn.bc",
-   "oclc_finite_only_off.amdgcn.bc",
-   FlushDenormalControlBC,
-   "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
-   "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC});
-  }
-  for (auto Lib : BCLibs)
-addBCLib(C, Args, CmdArgs, LibraryPaths, Lib);
-
   // Add an intermediate output file.
   CmdArgs.push_back("-o");
   std::string TmpName =
@@ -324,6 +287,44 @@ void HIPToolChain::addClangTargetOptions
 CC1Args.append({"-fvisibility", "hidden"});
 CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
+  ArgStringList LibraryPaths;
+
+  // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
+  for (auto Path :
+   DriverArgs.getAllArgValues(options::OPT_hip_device_lib_path_EQ))
+LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
+
+  addDirectoryList(DriverArgs, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH");
+
+  llvm::SmallVector BCLibs;
+
+  // Add bitcode library in --hip-device-lib.
+  for (auto Lib : DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ)) {
+BCLibs.push_back(DriverArgs.MakeArgString(Lib));
+  }
+
+  // If --hip-device-lib is not set, add the default bitcode libraries.
+  if (BCLibs.empty()) {
+// Get the bc lib file name for ISA version. For example,
+// gfx803 => oclc_isa_version_803.amdgcn.bc.
+std::string ISAVerBC =
+"oclc_isa_version_" + GpuArch.drop_front(3).str() + ".amdgcn.bc";
+
+llvm::StringRef FlushDenormalControlBC;
+if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero))
+  FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc";
+else
+  FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
+
+BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc",
+   "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc",
+   FlushDenormalCont

r359594 - AMDGPU: Enable _Float16

2019-04-30 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Apr 30 11:35:37 2019
New Revision: 359594

URL: http://llvm.org/viewvc/llvm-project?rev=359594&view=rev
Log:
AMDGPU: Enable _Float16

Added:
cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp
Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=359594&r1=359593&r2=359594&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Tue Apr 30 11:35:37 2019
@@ -252,6 +252,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const
  !isAMDGCN(Triple));
   UseAddrSpaceMapMangling = true;
 
+  HasLegalHalfType = true;
+  HasFloat16 = true;
+
   // Set pointer width and alignment for target address space 0.
   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
   if (getMaxPointerWidth() == 64) {

Added: cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp?rev=359594&view=auto
==
--- cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp Tue Apr 30 11:35:37 2019
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx701 -S -o - %s | 
FileCheck %s -check-prefix=NOF16
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx803 -S -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -S -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -S -o - %s | 
FileCheck %s
+void f() {
+  _Float16 x, y, z;
+  // CHECK: v_add_f16_e64
+  // NOF16: v_add_f32_e64
+  z = x + y;
+  // CHECK: v_sub_f16_e64
+  // NOF16: v_sub_f32_e64
+  z = x - y;
+  // CHECK: v_mul_f16_e64
+  // NOF16: v_mul_f32_e64
+  z = x * y;
+  // CHECK: v_div_fixup_f16
+  // NOF16: v_div_fixup_f32
+  z = x / y;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r359598 - Add requires amdgpu-registered-target for amdgpu-float16.cpp

2019-04-30 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Apr 30 12:06:15 2019
New Revision: 359598

URL: http://llvm.org/viewvc/llvm-project?rev=359598&view=rev
Log:
Add requires amdgpu-registered-target for amdgpu-float16.cpp

Modified:
cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp

Modified: cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp?rev=359598&r1=359597&r2=359598&view=diff
==
--- cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/amdgpu-float16.cpp Tue Apr 30 12:06:15 2019
@@ -1,3 +1,4 @@
+// REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx701 -S -o - %s | 
FileCheck %s -check-prefix=NOF16
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx803 -S -o - %s | 
FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -S -o - %s | 
FileCheck %s


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r343611 - [HIP] Support early finalization of device code for -fno-gpu-rdc

2018-10-02 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Oct  2 10:48:54 2018
New Revision: 343611

URL: http://llvm.org/viewvc/llvm-project?rev=343611&view=rev
Log:
[HIP] Support early finalization of device code for -fno-gpu-rdc

This patch renames -f{no-}cuda-rdc to -f{no-}gpu-rdc and keeps the original
options as aliases. When -fgpu-rdc is off,
clang will assume the device code in each translation unit does not call
external functions except those in the device library, therefore it is possible
to compile the device code in each translation unit to self-contained kernels
and embed them in the host object, so that the host object behaves like
usual host object which can be linked by lld.

The benefits of this feature is: 1. allow users to create static libraries which
can be linked by host linker; 2. amortized device code linking time.

This patch modifies HIP action builder to insert actions for linking device
code and generating HIP fatbin, and pass HIP fatbin to host backend action.
It extracts code for constructing command for generating HIP fatbin as
a function so that it can be reused by early finalization. It also modifies
codegen of HIP host constructor functions to embed the device fatbin
when it is available.

Differential Revision: https://reviews.llvm.org/D52377

Added:
cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip
cfe/trunk/test/Driver/hip-toolchain-rdc.hip
Removed:
cfe/trunk/test/Driver/hip-toolchain.hip
Modified:
cfe/trunk/include/clang/Basic/LangOptions.def
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/include/clang/Driver/Types.def
cfe/trunk/lib/AST/Decl.cpp
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/lib/Driver/ToolChains/CommonArgs.cpp
cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/lib/Driver/ToolChains/HIP.h
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/lib/Sema/SemaDeclAttr.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu
cfe/trunk/test/Driver/cuda-external-tools.cu
cfe/trunk/test/Driver/cuda-phases.cu
cfe/trunk/test/Driver/hip-output-file-name.hip
cfe/trunk/test/SemaCUDA/extern-shared.cu

Modified: cfe/trunk/include/clang/Basic/LangOptions.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=343611&r1=343610&r2=343611&view=diff
==
--- cfe/trunk/include/clang/Basic/LangOptions.def (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.def Tue Oct  2 10:48:54 2018
@@ -211,7 +211,7 @@ LANGOPT(CUDAIsDevice  , 1, 0, "compi
 LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA 
device code")
 LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr 
functions as __host__ __device__")
 LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate 
transcendental functions")
-LANGOPT(CUDARelocatableDeviceCode, 1, 0, "generate relocatable device code")
+LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")
 
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=343611&r1=343610&r2=343611&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Tue Oct  2 10:48:54 2018
@@ -584,9 +584,11 @@ def fno_cuda_flush_denormals_to_zero : F
 def fcuda_approx_transcendentals : Flag<["-"], "fcuda-approx-transcendentals">,
   Flags<[CC1Option]>, HelpText<"Use approximate transcendental functions">;
 def fno_cuda_approx_transcendentals : Flag<["-"], 
"fno-cuda-approx-transcendentals">;
-def fcuda_rdc : Flag<["-"], "fcuda-rdc">, Flags<[CC1Option]>,
+def fgpu_rdc : Flag<["-"], "fgpu-rdc">, Flags<[CC1Option]>,
   HelpText<"Generate relocatable device code, also known as separate 
compilation mode.">;
-def fno_cuda_rdc : Flag<["-"], "fno-cuda-rdc">;
+def fno_gpu_rdc : Flag<["-"], "fno-gpu-rdc">;
+def : Flag<["-"], "fcuda-rdc">, Alias;
+def : Flag<["-"], "fno-cuda-rdc">, Alias;
 def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address 
spaces.">;
 def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;

Modified: cfe/trunk/include/clang/Driver/Types.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=343611&r1=343610&r2=343611&view=diff
==
--- cfe/trunk/include/clang/Driver/Types.def (original)
+++ cfe/trunk/include/clang/Driver/Types.def Tue Oct  2 10:48:54 2018
@@ -101,4 +101,5 @@ TYPE("image",

r352801 - Do not copy long double and 128-bit fp format from aux target for AMDGPU

2019-01-31 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Jan 31 13:57:51 2019
New Revision: 352801

URL: http://llvm.org/viewvc/llvm-project?rev=352801&view=rev
Log:
Do not copy long double and 128-bit fp format from aux target for AMDGPU

rC352620 caused regressions because it copied floating point format from
aux target.

floating point format decides whether extended long double is supported.
It is x86_fp80 on x86 but IEEE double on amdgcn.

Document usage of long doubel type in HIP programming guide 
https://github.com/ROCm-Developer-Tools/HIP/pull/890

Differential Revision: https://reviews.llvm.org/D57527

Added:
cfe/trunk/test/CodeGenCUDA/types.cu
Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.cpp

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.cpp?rev=352801&r1=352800&r2=352801&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.cpp Thu Jan 31 13:57:51 2019
@@ -307,5 +307,16 @@ void AMDGPUTargetInfo::getTargetDefines(
 }
 
 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
+  assert(HalfFormat == Aux->HalfFormat);
+  assert(FloatFormat == Aux->FloatFormat);
+  assert(DoubleFormat == Aux->DoubleFormat);
+
+  // On x86_64 long double is 80-bit extended precision format, which is
+  // not supported by AMDGPU. 128-bit floating point format is also not
+  // supported by AMDGPU. Therefore keep its own format for these two types.
+  auto SaveLongDoubleFormat = LongDoubleFormat;
+  auto SaveFloat128Format = Float128Format;
   copyAuxTarget(Aux);
+  LongDoubleFormat = SaveLongDoubleFormat;
+  Float128Format = SaveFloat128Format;
 }

Added: cfe/trunk/test/CodeGenCUDA/types.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/types.cu?rev=352801&view=auto
==
--- cfe/trunk/test/CodeGenCUDA/types.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/types.cu Thu Jan 31 13:57:51 2019
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn -aux-triple x86_64 -fcuda-is-device 
-emit-llvm %s -o - | FileCheck -check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64 -aux-triple amdgcn -emit-llvm %s -o - | 
FileCheck -check-prefix=HOST %s
+
+#include "Inputs/cuda.h"
+
+// HOST: @ld_host = global x86_fp80 0xK
+long double ld_host;
+
+// DEV: @ld_device = addrspace(1) externally_initialized global double 
0.00e+00
+__device__ long double ld_device;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r354004 - [CUDA][HIP] Use device side kernel and variable names when registering them

2019-02-13 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Feb 13 18:00:09 2019
New Revision: 354004

URL: http://llvm.org/viewvc/llvm-project?rev=354004&view=rev
Log:
[CUDA][HIP] Use device side kernel and variable names when registering them

__hipRegisterFunction and __hipRegisterVar need to accept device side kernel 
and variable names
so that HIP runtime can associate kernel stub functions in host code with 
kernel symbols in fat binaries,
and associate shadow variables in host code with device variables in fat 
binaries.

Currently, clang assumes kernel functions and device variables have the same 
name as the kernel
stub functions and shadow variables. However, when host is compiled in windows 
with MSVC C++
ABI and device is compiled with Itanium C++ ABI (e.g. AMDGPU), kernels and 
device symbols in fat
binary are mangled differently than host.

This patch gets the device side kernel and variable name by mangling them in 
the mangle context
of aux target.

Differential Revision: https://reviews.llvm.org/D58163

Modified:
cfe/trunk/include/clang/AST/ASTContext.h
cfe/trunk/lib/AST/ASTContext.cpp
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/CodeGen/CGCUDARuntime.h
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/include/clang/AST/ASTContext.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/ASTContext.h?rev=354004&r1=354003&r2=354004&view=diff
==
--- cfe/trunk/include/clang/AST/ASTContext.h (original)
+++ cfe/trunk/include/clang/AST/ASTContext.h Wed Feb 13 18:00:09 2019
@@ -2237,7 +2237,8 @@ public:
 
   VTableContextBase *getVTableContext();
 
-  MangleContext *createMangleContext();
+  /// If \p T is null pointer, assume the target in ASTContext.
+  MangleContext *createMangleContext(const TargetInfo *T = nullptr);
 
   void DeepCollectObjCIvars(const ObjCInterfaceDecl *OI, bool leafClass,
 SmallVectorImpl &Ivars) const;

Modified: cfe/trunk/lib/AST/ASTContext.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=354004&r1=354003&r2=354004&view=diff
==
--- cfe/trunk/lib/AST/ASTContext.cpp (original)
+++ cfe/trunk/lib/AST/ASTContext.cpp Wed Feb 13 18:00:09 2019
@@ -9981,8 +9981,10 @@ VTableContextBase *ASTContext::getVTable
   return VTContext.get();
 }
 
-MangleContext *ASTContext::createMangleContext() {
-  switch (Target->getCXXABI().getKind()) {
+MangleContext *ASTContext::createMangleContext(const TargetInfo *T) {
+  if (!T)
+T = Target;
+  switch (T->getCXXABI().getKind()) {
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354004&r1=354003&r2=354004&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Wed Feb 13 18:00:09 2019
@@ -42,14 +42,25 @@ private:
   /// Convenience reference to the current module
   llvm::Module &TheModule;
   /// Keeps track of kernel launch stubs emitted in this module
-  llvm::SmallVector EmittedKernels;
-  llvm::SmallVector, 16> 
DeviceVars;
+  struct KernelInfo {
+llvm::Function *Kernel;
+const Decl *D;
+  };
+  llvm::SmallVector EmittedKernels;
+  struct VarInfo {
+llvm::GlobalVariable *Var;
+const VarDecl *D;
+unsigned Flag;
+  };
+  llvm::SmallVector DeviceVars;
   /// Keeps track of variable containing handle of GPU binary. Populated by
   /// ModuleCtorFunction() and used to create corresponding cleanup calls in
   /// ModuleDtorFunction()
   llvm::GlobalVariable *GpuBinaryHandle = nullptr;
   /// Whether we generate relocatable device code.
   bool RelocatableDeviceCode;
+  /// Mangle context for device.
+  std::unique_ptr DeviceMC;
 
   llvm::FunctionCallee getSetupArgumentFn() const;
   llvm::FunctionCallee getLaunchFn() const;
@@ -106,13 +117,15 @@ private:
 
   void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
   void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
+  std::string getDeviceSideName(const Decl *ND);
 
 public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
   void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
-  void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
-DeviceVars.push_back(std::make_pair(&Var, Flags));
+  void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
+ unsigned Flags) override {
+DeviceVars.push_back({&Var, VD, Flags});
   }
 
   /// Creates module constructor function
@@ -138,7 +151,9 @@ CGNVCUDARuntime::addUnderscoredPrefixToN
 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)

r354615 - [HIP] change kernel stub name

2019-02-21 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Feb 21 12:12:16 2019
New Revision: 354615

URL: http://llvm.org/viewvc/llvm-project?rev=354615&view=rev
Log:
[HIP] change kernel stub name

Add .stub to kernel stub function name so that it is different from kernel
name in device code. This is necessary to let debugger find correct symbol
for kernel

Differential Revision: https://reviews.llvm.org/D58518

Modified:
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354615&r1=354614&r2=354615&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Thu Feb 21 12:12:16 2019
@@ -227,6 +227,12 @@ void CGNVCUDARuntime::emitDeviceStub(Cod
 emitDeviceStubBodyNew(CGF, Args);
   else
 emitDeviceStubBodyLegacy(CGF, Args);
+
+  // Postfix kernel stub names with .stub to differentiate them from kernel
+  // names in device binaries. This is to facilitate the debugger to find
+  // the correct symbols for kernels in the device binary.
+  if (CGF.getLangOpts().HIP)
+CGF.CurFn->setName(CGF.CurFn->getName() + ".stub");
 }
 
 // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=354615&r1=354614&r2=354615&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Thu Feb 21 12:12:16 2019
@@ -145,7 +145,8 @@ void use_pointers() {
 // Test that we build the correct number of calls to cudaSetupArgument followed
 // by a call to cudaLaunch.
 
-// LNX: define{{.*}}kernelfunc
+// CUDA-LABEL: define{{.*}}kernelfunc
+// HIP-LABEL: define{{.*}}@_Z10kernelfunciii.stub
 
 // New launch sequence stores arguments into local buffer and passes array of
 // pointers to them directly to cudaLaunchKernel


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r354651 - revert r354615: [HIP] change kernel stub name

2019-02-21 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Feb 21 20:20:12 2019
New Revision: 354651

URL: http://llvm.org/viewvc/llvm-project?rev=354651&view=rev
Log:
revert r354615: [HIP] change kernel stub name

It caused regressions.

Differential Revision: https://reviews.llvm.org/D58518

Modified:
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/test/CodeGenCUDA/device-stub.cu

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=354651&r1=354650&r2=354651&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Thu Feb 21 20:20:12 2019
@@ -227,12 +227,6 @@ void CGNVCUDARuntime::emitDeviceStub(Cod
 emitDeviceStubBodyNew(CGF, Args);
   else
 emitDeviceStubBodyLegacy(CGF, Args);
-
-  // Postfix kernel stub names with .stub to differentiate them from kernel
-  // names in device binaries. This is to facilitate the debugger to find
-  // the correct symbols for kernels in the device binary.
-  if (CGF.getLangOpts().HIP)
-CGF.CurFn->setName(CGF.CurFn->getName() + ".stub");
 }
 
 // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local

Modified: cfe/trunk/test/CodeGenCUDA/device-stub.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/device-stub.cu?rev=354651&r1=354650&r2=354651&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/device-stub.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/device-stub.cu Thu Feb 21 20:20:12 2019
@@ -145,8 +145,7 @@ void use_pointers() {
 // Test that we build the correct number of calls to cudaSetupArgument followed
 // by a call to cudaLaunch.
 
-// CUDA-LABEL: define{{.*}}kernelfunc
-// HIP-LABEL: define{{.*}}@_Z10kernelfunciii.stub
+// LNX: define{{.*}}kernelfunc
 
 // New launch sequence stores arguments into local buffer and passes array of
 // pointers to them directly to cudaLaunchKernel


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r312441 - [OpenCL] Do not use vararg in emitted functions for enqueue_kernel

2017-09-03 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Sun Sep  3 06:52:24 2017
New Revision: 312441

URL: http://llvm.org/viewvc/llvm-project?rev=312441&view=rev
Log:
[OpenCL] Do not use vararg in emitted functions for enqueue_kernel

Not all targets support vararg (e.g. amdgpu). Instead of using vararg in the 
emitted functions for enqueue_kernel,
this patch creates a temporary array of size_t, stores the size arguments in 
the temporary array
and passes it to the emitted functions for enqueue_kernel.

Differential Revision: https://reviews.llvm.org/D36678

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGenOpenCL/cl20-device-side-enqueue.cl

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=312441&r1=312440&r2=312441&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Sep  3 06:52:24 2017
@@ -2601,27 +2601,50 @@ RValue CodeGenFunction::EmitBuiltinExpr(
 }
 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
 
+// Create a temporary array to hold the sizes of local pointer arguments
+// for the block. \p First is the position of the first size argument.
+auto CreateArrayForSizeVar = [=](unsigned First) {
+  auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
+  auto *Arr = Builder.CreateAlloca(AT);
+  llvm::Value *Ptr;
+  // Each of the following arguments specifies the size of the 
corresponding
+  // argument passed to the enqueued block.
+  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
+  for (unsigned I = First; I < NumArgs; ++I) {
+auto *Index = llvm::ConstantInt::get(IntTy, I - First);
+auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+if (I == First)
+  Ptr = GEP;
+auto *V =
+Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
+Builder.CreateAlignedStore(
+V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
+  }
+  return Ptr;
+};
+
 // Could have events and/or vaargs.
 if (E->getArg(3)->getType()->isBlockPointerType()) {
   // No events passed, but has variadic arguments.
   Name = "__enqueue_kernel_vaargs";
-  llvm::Value *Block = Builder.CreatePointerCast(
-  EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
+  auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)),
+  GenericVoidPtrTy);
+  auto *PtrToSizeArray = CreateArrayForSizeVar(4);
+
   // Create a vector of the arguments, as well as a constant value to
   // express to the runtime the number of variadic arguments.
-  std::vector Args = {Queue, Flags, Range, Block,
- ConstantInt::get(IntTy, NumArgs - 4)};
-  std::vector ArgTys = {QueueTy, IntTy, RangeTy,
-  GenericVoidPtrTy, IntTy};
-
-  // Each of the following arguments specifies the size of the 
corresponding
-  // argument passed to the enqueued block.
-  for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
-Args.push_back(
-Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+  std::vector Args = {Queue,
+ Flags,
+ Range,
+ Block,
+ ConstantInt::get(IntTy, NumArgs - 4),
+ PtrToSizeArray};
+  std::vector ArgTys = {QueueTy, IntTy,
+  RangeTy, GenericVoidPtrTy,
+  IntTy,   PtrToSizeArray->getType()};
 
   llvm::FunctionType *FTy = llvm::FunctionType::get(
-  Int32Ty, llvm::ArrayRef(ArgTys), true);
+  Int32Ty, llvm::ArrayRef(ArgTys), false);
   return RValue::get(
   Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
  llvm::ArrayRef(Args)));
@@ -2667,14 +2690,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   ArgTys.push_back(Int32Ty);
   Name = "__enqueue_kernel_events_vaargs";
 
-  // Each of the following arguments specifies the size of the 
corresponding
-  // argument passed to the enqueued block.
-  for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
-Args.push_back(
-Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+  auto *PtrToSizeArray = CreateArrayForSizeVar(7);
+  Args.push_back(PtrToSizeArray);
+  ArgTys.push_back(PtrToSizeArray->getType());
 
   llvm::FunctionType *FTy = llvm::FunctionType::get(
-  Int32Ty, llvm::ArrayRef(ArgTys), true);
+  Int32Ty, llvm::ArrayRef(ArgTys), false);
   return RValue::ge

r313171 - [AMDGPU] Change addr space of clk_event_t, queue_t and reserve_id_t to global

2017-09-13 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Sep 13 11:50:42 2017
New Revision: 313171

URL: http://llvm.org/viewvc/llvm-project?rev=313171&view=rev
Log:
[AMDGPU] Change addr space of clk_event_t, queue_t and reserve_id_t to global

Differential Revision: https://reviews.llvm.org/D37703

Modified:
cfe/trunk/lib/Basic/Targets/AMDGPU.h
cfe/trunk/test/CodeGenOpenCL/opencl_types.cl

Modified: cfe/trunk/lib/Basic/Targets/AMDGPU.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/AMDGPU.h?rev=313171&r1=313170&r2=313171&view=diff
==
--- cfe/trunk/lib/Basic/Targets/AMDGPU.h (original)
+++ cfe/trunk/lib/Basic/Targets/AMDGPU.h Wed Sep 13 11:50:42 2017
@@ -202,6 +202,10 @@ public:
   case BuiltinType::Id:
\
 return LangAS::opencl_constant;
 #include "clang/Basic/OpenCLImageTypes.def"
+case BuiltinType::OCLClkEvent:
+case BuiltinType::OCLQueue:
+case BuiltinType::OCLReserveID:
+  return LangAS::opencl_global;
 
 default:
   return TargetInfo::getOpenCLTypeAddrSpace(T);

Modified: cfe/trunk/test/CodeGenOpenCL/opencl_types.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/opencl_types.cl?rev=313171&r1=313170&r2=313171&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/opencl_types.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/opencl_types.cl Wed Sep 13 11:50:42 2017
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "spir-unknown-unknown" -emit-llvm 
-o - -O0 | FileCheck %s --check-prefix=CHECK-SPIR
-// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "amdgcn--amdhsa" -emit-llvm -o - 
-O0 | FileCheck %s --check-prefix=CHECK-AMDGCN
+// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "spir-unknown-unknown" -emit-llvm 
-o - -O0 | FileCheck %s --check-prefixes=CHECK-COM,CHECK-SPIR
+// RUN: %clang_cc1 -cl-std=CL2.0 %s -triple "amdgcn--amdhsa" -emit-llvm -o - 
-O0 | FileCheck %s --check-prefixes=CHECK-COM,CHECK-AMDGCN
 
 #define CLK_ADDRESS_CLAMP_TO_EDGE   2
 #define CLK_NORMALIZED_COORDS_TRUE  1
@@ -7,7 +7,7 @@
 #define CLK_FILTER_LINEAR   0x20
 
 constant sampler_t glb_smp = 
CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_NEAREST;
-// CHECK-SPIR-NOT: constant i32
+// CHECK-COM-NOT: constant i32
 
 void fnc1(image1d_t img) {}
 // CHECK-SPIR: @fnc1(%opencl.image1d_ro_t addrspace(1)*
@@ -39,20 +39,23 @@ void fnc4smp(sampler_t s) {}
 
 kernel void foo(image1d_t img) {
   sampler_t smp = 
CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_LINEAR;
-  // CHECK-SPIR: alloca %opencl.sampler_t addrspace(2)*
+  // CHECK-COM: alloca %opencl.sampler_t addrspace(2)*
   event_t evt;
-  // CHECK-SPIR: alloca %opencl.event_t*
+  // CHECK-COM: alloca %opencl.event_t*
   clk_event_t clk_evt;
   // CHECK-SPIR: alloca %opencl.clk_event_t*
+  // CHECK-AMDGCN: alloca %opencl.clk_event_t addrspace(1)*
   queue_t queue;
   // CHECK-SPIR: alloca %opencl.queue_t*
+  // CHECK-AMDGCN: alloca %opencl.queue_t addrspace(1)*
   reserve_id_t rid;
   // CHECK-SPIR: alloca %opencl.reserve_id_t*
-  // CHECK-SPIR: store %opencl.sampler_t addrspace(2)*
+  // CHECK-AMDGCN: alloca %opencl.reserve_id_t addrspace(1)*
+  // CHECK-COM: store %opencl.sampler_t addrspace(2)*
   fnc4smp(smp);
-  // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
   fnc4smp(glb_smp);
-  // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+  // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
 }
 
 kernel void foo_pipe(read_only pipe int p) {}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r313172 - Add more tests for OpenCL atomic builtin functions

2017-09-13 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Sep 13 11:56:25 2017
New Revision: 313172

URL: http://llvm.org/viewvc/llvm-project?rev=313172&view=rev
Log:
Add more tests for OpenCL atomic builtin functions

Add tests for different address spaces and insert some blank lines to make them 
more readable.

Differential Revision: https://reviews.llvm.org/D37742

Modified:
cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl
cfe/trunk/test/CodeGenOpenCL/atomic-ops.cl

Modified: cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl?rev=313172&r1=313171&r2=313172&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/atomic-ops-libcall.cl Wed Sep 13 11:56:25 2017
@@ -18,39 +18,64 @@ typedef enum memory_scope {
 #endif
 } memory_scope;
 
-void f(atomic_int *i, atomic_uint *ui, int cmp, int order, int scope) {
+void f(atomic_int *i, global atomic_int *gi, local atomic_int *li, private 
atomic_int *pi, atomic_uint *ui, int cmp, int order, int scope) {
   int x;
   // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8 addrspace(4)* 
{{%[0-9]+}}, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(i8* {{%[0-9]+}}, i32 
5, i32 1)
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
+
   // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* {{%[0-9]+}}, 
i32 {{%[0-9]+}}, i32 5, i32 1)
   // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, 
i32 5, i32 1)
   __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: %[[GP:[0-9]+]] = addrspacecast i8 addrspace(1)* {{%[0-9]+}} to i8 
addrspace(4)*
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 
{{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, 
i32 5, i32 1)
+  __opencl_atomic_store(gi, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: %[[GP:[0-9]+]] = addrspacecast i8 addrspace(3)* {{%[0-9]+}} to i8 
addrspace(4)*
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 
{{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, 
i32 5, i32 1)
+  __opencl_atomic_store(li, 1, memory_order_seq_cst, memory_scope_work_group);
+
+  // SPIR: %[[GP:[0-9]+]] = addrspacecast i8* {{%[0-9]+}} to i8 addrspace(4)*
+  // SPIR: call void @__opencl_atomic_store_4(i8 addrspace(4)* %[[GP]], i32 
{{%[0-9]+}}, i32 5, i32 1)
+  // ARM: call void @__opencl_atomic_store_4(i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, 
i32 5, i32 1)
+  __opencl_atomic_store(pi, 1, memory_order_seq_cst, memory_scope_work_group);
+
   // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8 addrspace(4)* 
{{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(i8* {{%[0-9]+}}, 
i32 {{%[0-9]+}}, i32 5, i32 1)
   x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, 
memory_scope_work_group);
+
   // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8 addrspace(4)* 
{{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(i8* {{%[0-9]+}}, 
i32 {{%[0-9]+}}, i32 5, i32 1)
   x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, 
memory_scope_work_group);
+
   // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8 
addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(i8* {{%[0-9]+}}, 
i32 {{%[0-9]+}}, i32 5, i32 1)
   x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, 
memory_scope_work_group);
+
   // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 
addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 
5, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* 
{{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
   x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, 
memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
+
   // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 
addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 
5, i32 5, i32 1)
   // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* 
{{%[0-9]+}}, i8* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 5, i32 5, i32 1)
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, 
memory_order_seq_cst, memory_scope_work_group);
+
   // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8 
addrspace(4)* {{%[0-9]+}}, i8 addrspace(4)* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 
5, i32 5, i32 2)
   // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(i8* 
{{%[

r363076 - Revert r344630 Disable code object version 3 for HIP toolchain.

2019-06-11 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Jun 11 08:05:11 2019
New Revision: 363076

URL: http://llvm.org/viewvc/llvm-project?rev=363076&view=rev
Log:
Revert r344630 Disable code object version 3 for HIP toolchain.

Remove the workaround so that by default code object v3 is enabled.

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=363076&r1=363075&r2=363076&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Tue Jun 11 08:05:11 2019
@@ -127,7 +127,7 @@ const char *AMDGCN::Linker::constructLlc
 llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
   // Construct llc command.
   ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa",
-"-filetype=obj", "-mattr=-code-object-v3",
+"-filetype=obj",
 Args.MakeArgString("-mcpu=" + SubArchName)};
 
   // Extract all the -m options


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r363414 - [AMDGPU] Enable the implicit arguments for HIP (CLANG)

2019-06-14 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Jun 14 08:54:47 2019
New Revision: 363414

URL: http://llvm.org/viewvc/llvm-project?rev=363414&view=rev
Log:
[AMDGPU] Enable the implicit arguments for HIP (CLANG)

Enable 48-bytes of implicit arguments for HIP as well. Earlier it was enabled 
for OpenCL. This code is specific to AMDGPU target.

Differential Revision: https://reviews.llvm.org/D62244

Added:
cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
Modified:
cfe/trunk/lib/CodeGen/TargetInfo.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=363414&r1=363413&r2=363414&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Jun 14 08:54:47 2019
@@ -7868,7 +7868,8 @@ void AMDGPUTargetCodeGenInfo::setTargetA
   const auto *ReqdWGS = M.getLangOpts().OpenCL ?
 FD->getAttr() : nullptr;
 
-  if (M.getLangOpts().OpenCL && FD->hasAttr() &&
+  if (((M.getLangOpts().OpenCL && FD->hasAttr()) ||
+  (M.getLangOpts().HIP && FD->hasAttr())) &&
   (M.getTriple().getOS() == llvm::Triple::AMDHSA))
 F->addFnAttr("amdgpu-implicitarg-num-bytes", "48");
 

Added: cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu?rev=363414&view=auto
==
--- cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu Fri Jun 14 
08:54:47 2019
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x 
hip -o - %s | FileCheck %s
+#include "Inputs/cuda.h"
+
+__global__ void hip_kernel_temp() {
+}
+
+// CHECK: attributes {{.*}} = {{.*}} "amdgpu-implicitarg-num-bytes"="48"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r364380 - Fix build failure due to missing break

2019-06-25 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Jun 25 20:33:03 2019
New Revision: 364380

URL: http://llvm.org/viewvc/llvm-project?rev=364380&view=rev
Log:
Fix build failure due to missing break

Modified:
cfe/trunk/lib/Basic/Targets/ARM.cpp

Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=364380&r1=364379&r2=364380&view=diff
==
--- cfe/trunk/lib/Basic/Targets/ARM.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/ARM.cpp Tue Jun 25 20:33:03 2019
@@ -910,6 +910,7 @@ bool ARMTargetInfo::validateAsmConstrain
   Name++;
   return true;
 }
+break;
   case 'U': // a memory reference...
 switch (Name[1]) {
 case 'q': // ...ARMV4 ldrsb
@@ -925,6 +926,7 @@ bool ARMTargetInfo::validateAsmConstrain
   Name++;
   return true;
 }
+break;
   }
   return false;
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r364381 - [HIP] Support attribute hip_pinned_shadow

2019-06-25 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Jun 25 20:47:37 2019
New Revision: 364381

URL: http://llvm.org/viewvc/llvm-project?rev=364381&view=rev
Log:
[HIP] Support attribute hip_pinned_shadow

This patch introduces support of hip_pinned_shadow variable for HIP.

A hip_pinned_shadow variable is a global variable with attribute 
hip_pinned_shadow.
It has external linkage on device side and has no initializer. It has internal
linkage on host side and has initializer or static constructor. It can be 
accessed
in both device code and host code.

This allows HIP runtime to implement support of HIP texture reference.

Differential Revision: https://reviews.llvm.org/D62738

Added:
cfe/trunk/test/AST/ast-dump-hip-pinned-shadow.cu
cfe/trunk/test/CodeGenCUDA/hip-pinned-shadow.cu
cfe/trunk/test/SemaCUDA/hip-pinned-shadow.cu
Modified:
cfe/trunk/include/clang/Basic/Attr.td
cfe/trunk/include/clang/Basic/AttrDocs.td
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/lib/Driver/ToolChains/HIP.cpp
cfe/trunk/lib/Sema/SemaDeclAttr.cpp
cfe/trunk/test/Driver/hip-toolchain-no-rdc.hip
cfe/trunk/test/Driver/hip-toolchain-rdc.hip
cfe/trunk/test/Misc/pragma-attribute-supported-attributes-list.test

Modified: cfe/trunk/include/clang/Basic/Attr.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=364381&r1=364380&r2=364381&view=diff
==
--- cfe/trunk/include/clang/Basic/Attr.td (original)
+++ cfe/trunk/include/clang/Basic/Attr.td Tue Jun 25 20:47:37 2019
@@ -295,6 +295,7 @@ class LangOpt;
 def Borland : LangOpt<"Borland">;
 def CUDA : LangOpt<"CUDA">;
+def HIP : LangOpt<"HIP">;
 def COnly : LangOpt<"COnly", "!LangOpts.CPlusPlus">;
 def CPlusPlus : LangOpt<"CPlusPlus">;
 def OpenCL : LangOpt<"OpenCL">;
@@ -957,6 +958,13 @@ def CUDADevice : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def HIPPinnedShadow : InheritableAttr {
+  let Spellings = [GNU<"hip_pinned_shadow">, 
Declspec<"__hip_pinned_shadow__">];
+  let Subjects = SubjectList<[Var]>;
+  let LangOpts = [HIP];
+  let Documentation = [HIPPinnedShadowDocs];
+}
+
 def CUDADeviceBuiltin : IgnoredAttr {
   let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">];
   let LangOpts = [CUDA];

Modified: cfe/trunk/include/clang/Basic/AttrDocs.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AttrDocs.td?rev=364381&r1=364380&r2=364381&view=diff
==
--- cfe/trunk/include/clang/Basic/AttrDocs.td (original)
+++ cfe/trunk/include/clang/Basic/AttrDocs.td Tue Jun 25 20:47:37 2019
@@ -4183,3 +4183,15 @@ This attribute does not affect optimizat
 ``__attribute__((malloc))``.
 }];
 }
+
+def HIPPinnedShadowDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+The GNU style attribute __attribute__((hip_pinned_shadow)) or MSVC style 
attribute
+__declspec(hip_pinned_shadow) can be added to the definition of a global 
variable
+to indicate it is a HIP pinned shadow variable. A HIP pinned shadow variable 
can
+be accessed on both device side and host side. It has external linkage and is
+not initialized on device side. It has internal linkage and is initialized by
+the initializer on host side.
+  }];
+}
\ No newline at end of file

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=364381&r1=364380&r2=364381&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue Jun 25 20:47:37 2019
@@ -2415,7 +2415,8 @@ void CodeGenModule::EmitGlobal(GlobalDec
   if (!Global->hasAttr() &&
   !Global->hasAttr() &&
   !Global->hasAttr() &&
-  !Global->hasAttr())
+  !Global->hasAttr() &&
+  !(LangOpts.HIP && Global->hasAttr()))
 return;
 } else {
   // We need to emit host-side 'shadows' for all global
@@ -3781,7 +3782,12 @@ void CodeGenModule::EmitGlobalVarDefinit
   !getLangOpts().CUDAIsDevice &&
   (D->hasAttr() || D->hasAttr() ||
D->hasAttr());
-  if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar))
+  // HIP pinned shadow of initialized host-side global variables are also
+  // left undefined.
+  bool IsHIPPinnedShadowVar =
+  getLangOpts().CUDAIsDevice && D->hasAttr();
+  if (getLangOpts().CUDA &&
+  (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar))
 Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
   else if (!InitExpr) {
 // This is a tentative definition; tentative definitions are
@@ -3892,7 +3898,8 @@ void CodeGenModule::EmitGlobalVarDefinit
   // global variables become internal definitions. These have to
   // be internal in order to pre

r365799 - [HIP] Add GPU arch gfx1010, gfx1011, and gfx1012

2019-07-11 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Jul 11 10:50:09 2019
New Revision: 365799

URL: http://llvm.org/viewvc/llvm-project?rev=365799&view=rev
Log:
[HIP] Add GPU arch gfx1010, gfx1011, and gfx1012

Differential Revision: https://reviews.llvm.org/D64364

Modified:
cfe/trunk/include/clang/Basic/Cuda.h
cfe/trunk/lib/Basic/Cuda.cpp
cfe/trunk/lib/Basic/Targets/NVPTX.cpp
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

Modified: cfe/trunk/include/clang/Basic/Cuda.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Cuda.h?rev=365799&r1=365798&r2=365799&view=diff
==
--- cfe/trunk/include/clang/Basic/Cuda.h (original)
+++ cfe/trunk/include/clang/Basic/Cuda.h Thu Jul 11 10:50:09 2019
@@ -66,6 +66,9 @@ enum class CudaArch {
   GFX906,
   GFX908,
   GFX909,
+  GFX1010,
+  GFX1011,
+  GFX1012,
   LAST,
 };
 const char *CudaArchToString(CudaArch A);

Modified: cfe/trunk/lib/Basic/Cuda.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Cuda.cpp?rev=365799&r1=365798&r2=365799&view=diff
==
--- cfe/trunk/lib/Basic/Cuda.cpp (original)
+++ cfe/trunk/lib/Basic/Cuda.cpp Thu Jul 11 10:50:09 2019
@@ -113,6 +113,12 @@ const char *CudaArchToString(CudaArch A)
 return "gfx908";
   case CudaArch::GFX909: // TBA
 return "gfx909";
+  case CudaArch::GFX1010: // TBA
+return "gfx1010";
+  case CudaArch::GFX1011: // TBA
+return "gfx1011";
+  case CudaArch::GFX1012: // TBA
+return "gfx1012";
   }
   llvm_unreachable("invalid enum");
 }
@@ -151,6 +157,9 @@ CudaArch StringToCudaArch(llvm::StringRe
   .Case("gfx906", CudaArch::GFX906)
   .Case("gfx908", CudaArch::GFX908)
   .Case("gfx909", CudaArch::GFX909)
+  .Case("gfx1010", CudaArch::GFX1010)
+  .Case("gfx1011", CudaArch::GFX1011)
+  .Case("gfx1012", CudaArch::GFX1012)
   .Default(CudaArch::UNKNOWN);
 }
 
@@ -264,6 +273,9 @@ CudaVirtualArch VirtualArchForCudaArch(C
   case CudaArch::GFX906:
   case CudaArch::GFX908:
   case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
 return CudaVirtualArch::COMPUTE_AMDGCN;
   }
   llvm_unreachable("invalid enum");
@@ -312,6 +324,9 @@ CudaVersion MinVersionForCudaArch(CudaAr
   case CudaArch::GFX906:
   case CudaArch::GFX908:
   case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
 return CudaVersion::CUDA_70;
   }
   llvm_unreachable("invalid enum");
@@ -336,6 +351,9 @@ CudaVersion MaxVersionForCudaArch(CudaAr
   case CudaArch::GFX810:
   case CudaArch::GFX900:
   case CudaArch::GFX902:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
 return CudaVersion::CUDA_80;
   default:
 return CudaVersion::LATEST;

Modified: cfe/trunk/lib/Basic/Targets/NVPTX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/NVPTX.cpp?rev=365799&r1=365798&r2=365799&view=diff
==
--- cfe/trunk/lib/Basic/Targets/NVPTX.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/NVPTX.cpp Thu Jul 11 10:50:09 2019
@@ -193,6 +193,9 @@ void NVPTXTargetInfo::getTargetDefines(c
   case CudaArch::GFX906:
   case CudaArch::GFX908:
   case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
   case CudaArch::LAST:
 break;
   case CudaArch::UNKNOWN:

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=365799&r1=365798&r2=365799&view=diff
==
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Thu Jul 11 10:50:09 2019
@@ -4930,6 +4930,9 @@ void CGOpenMPRuntimeNVPTX::checkArchForU
   case CudaArch::GFX906:
   case CudaArch::GFX908:
   case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
   case CudaArch::UNKNOWN:
 break;
   case CudaArch::LAST:
@@ -4985,6 +4988,9 @@ static std::pair get
   case CudaArch::GFX906:
   case CudaArch::GFX908:
   case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
   case CudaArch::UNKNOWN:
 break;
   case CudaArch::LAST:


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r361757 - [OpenCL] Fix file-scope const sampler variable for 2.0

2019-05-27 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Mon May 27 04:19:07 2019
New Revision: 361757

URL: http://llvm.org/viewvc/llvm-project?rev=361757&view=rev
Log:
[OpenCL] Fix file-scope const sampler variable for 2.0

OpenCL spec v2.0 s6.13.14:

Samplers can also be declared as global constants in the program
source using the following syntax.

   const sampler_t  = 
This works fine for OpenCL 1.2 but fails for 2.0, because clang duduces
address space of file-scope const sampler variable to be in global address
space whereas spec v2.0 s6.9.b forbids file-scope sampler variable to be
in global address space.

The fix is not to deduce address space for file-scope sampler variables.

Differential Revision: https://reviews.llvm.org/D62197

Modified:
cfe/trunk/lib/Sema/SemaType.cpp
cfe/trunk/test/CodeGenOpenCL/sampler.cl
cfe/trunk/test/SemaOpenCL/sampler_t.cl

Modified: cfe/trunk/lib/Sema/SemaType.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaType.cpp?rev=361757&r1=361756&r2=361757&view=diff
==
--- cfe/trunk/lib/Sema/SemaType.cpp (original)
+++ cfe/trunk/lib/Sema/SemaType.cpp Mon May 27 04:19:07 2019
@@ -7363,7 +7363,21 @@ static void deduceOpenCLImplicitAddrSpac
   T->isDependentType() ||
   // Do not deduce addr space of decltype because it will be taken from
   // its argument.
-  T->isDecltypeType())
+  T->isDecltypeType() ||
+  // OpenCL spec v2.0 s6.9.b:
+  // The sampler type cannot be used with the __local and __global address
+  // space qualifiers.
+  // OpenCL spec v2.0 s6.13.14:
+  // Samplers can also be declared as global constants in the program
+  // source using the following syntax.
+  //   const sampler_t  = 
+  // In codegen, file-scope sampler type variable has special handing and
+  // does not rely on address space qualifier. On the other hand, deducing
+  // address space of const sampler file-scope variable as global address
+  // space causes spurious diagnostic about __global address space
+  // qualifier, therefore do not deduce address space of file-scope sampler
+  // type variable.
+  (D.getContext() == DeclaratorContext::FileContext && T->isSamplerT()))
 return;
 
   LangAS ImpAddr = LangAS::Default;

Modified: cfe/trunk/test/CodeGenOpenCL/sampler.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/sampler.cl?rev=361757&r1=361756&r2=361757&view=diff
==
--- cfe/trunk/test/CodeGenOpenCL/sampler.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/sampler.cl Mon May 27 04:19:07 2019
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | 
FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o 
- -O0 | FileCheck %s
 //
 // This test covers 5 cases of sampler initialzation:
 //   1. function argument passing
@@ -6,8 +7,9 @@
 //  1b. argument is a function-scope variable
 //  1c. argument is one of caller function's parameters
 //   2. variable initialization
-//  2a. initializing a file-scope variable
+//  2a. initializing a file-scope variable with constant addr space 
qualifier
 //  2b. initializing a function-scope variable
+//  2c. initializing a file-scope variable with const qualifier
 
 #define CLK_ADDRESS_CLAMP_TO_EDGE   2
 #define CLK_NORMALIZED_COORDS_TRUE  1
@@ -20,6 +22,10 @@
 constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE | 
CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
 // CHECK-NOT: glb_smp
 
+// Case 2c
+const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | 
CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR;
+// CHECK-NOT: glb_smp_const
+
 int get_sampler_initializer(void);
 
 void fnc4smp(sampler_t s) {}
@@ -47,11 +53,16 @@ kernel void foo(sampler_t smp_par) {
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, 
%opencl.sampler_t addrspace(2)** [[smp_ptr]]
   // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* 
[[SAMP]])
 
-  // Case 1a
+  // Case 1a/2a
   fnc4smp(glb_smp);
   // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* 
@__translate_sampler_initializer(i32 35)
   // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* 
[[SAMP]])
 
+  // Case 1a/2c
+  fnc4smp(glb_smp_const);
+  // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* 
@__translate_sampler_initializer(i32 35)
+  // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* 
[[SAMP]])
+
   // Case 1c
   fnc4smp(smp_par);
   // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, 
%opencl.sampler_t addrspace(2)** [[smp_par_ptr]]

Modified: cfe/trunk/test/SemaOpenCL/sampler_t.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/sampler_t.cl?rev=361757&r1=361756&r2=361757&view=diff
=

r361880 - [CUDA][HIP] Emit dependent libs for host only

2019-05-28 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May 28 14:18:59 2019
New Revision: 361880

URL: http://llvm.org/viewvc/llvm-project?rev=361880&view=rev
Log:
[CUDA][HIP] Emit dependent libs for host only

Recently D60274 was introduced to allow lld to handle dependent libs. However 
current
usage of dependent libs (e.g. pragma comment(lib, *) in windows header files) 
are intended
for host only. Emitting the metadata in device IR causes link error in device 
path.

Until there is a way to different it dependent libs for device or host, 
metadata for dependent
libs should be emitted for host only. This patch enforces that.

Differential Revision: https://reviews.llvm.org/D62483

Added:
cfe/trunk/test/CodeGenCUDA/dependent-libs.cu
Modified:
cfe/trunk/lib/CodeGen/CodeGenModule.cpp

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=361880&r1=361879&r2=361880&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Tue May 28 14:18:59 2019
@@ -457,7 +457,12 @@ void CodeGenModule::Release() {
   // that ELF linkers tend to handle libraries in a more complicated fashion
   // than on other platforms. This forces us to defer handling the dependent
   // libs to the linker.
-  if (!ELFDependentLibraries.empty()) {
+  //
+  // CUDA/HIP device and host libraries are different. Currently there is no
+  // way to differentiate dependent libraries for host or device. Existing
+  // usage of #pragma comment(lib, *) is intended for host libraries on
+  // Windows. Therefore emit llvm.dependent-libraries only for host.
+  if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) {
 auto *NMD = 
getModule().getOrInsertNamedMetadata("llvm.dependent-libraries");
 for (auto *MD : ELFDependentLibraries)
   NMD->addOperand(MD);

Added: cfe/trunk/test/CodeGenCUDA/dependent-libs.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/dependent-libs.cu?rev=361880&view=auto
==
--- cfe/trunk/test/CodeGenCUDA/dependent-libs.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/dependent-libs.cu Tue May 28 14:18:59 2019
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck 
--check-prefix=DEV %s
+// RUN: %clang_cc1 -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s
+
+// DEV-NOT: llvm.dependent-libraries
+// HOST: llvm.dependent-libraries
+#pragma comment(lib, "libabc")


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r361905 - Fix failure of lit test dependent-libs.cu

2019-05-28 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue May 28 18:34:44 2019
New Revision: 361905

URL: http://llvm.org/viewvc/llvm-project?rev=361905&view=rev
Log:
Fix failure of lit test dependent-libs.cu

Modified:
cfe/trunk/test/CodeGenCUDA/dependent-libs.cu

Modified: cfe/trunk/test/CodeGenCUDA/dependent-libs.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/dependent-libs.cu?rev=361905&r1=361904&r2=361905&view=diff
==
--- cfe/trunk/test/CodeGenCUDA/dependent-libs.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/dependent-libs.cu Tue May 28 18:34:44 2019
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -x hip %s | FileCheck 
--check-prefix=DEV %s
-// RUN: %clang_cc1 -emit-llvm -o - -x hip %s | FileCheck --check-prefix=HOST %s
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - -fcuda-is-device 
-x hip %s | FileCheck --check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - -x hip %s | 
FileCheck --check-prefix=HOST %s
 
 // DEV-NOT: llvm.dependent-libraries
 // HOST: llvm.dependent-libraries


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r350885 - [HIP] Use nul instead of /dev/null when running on windows

2019-01-10 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Jan 10 12:09:52 2019
New Revision: 350885

URL: http://llvm.org/viewvc/llvm-project?rev=350885&view=rev
Log:
[HIP] Use nul instead of /dev/null when running on windows

When clang is running on windows, /dev/null is not available. Use nul as empty 
input file instead.

Differential Revision: https://reviews.llvm.org/D56225

Modified:
cfe/trunk/lib/Driver/ToolChains/HIP.cpp

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=350885&r1=350884&r2=350885&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Jan 10 12:09:52 2019
@@ -24,6 +24,12 @@ using namespace clang::driver::tools;
 using namespace clang;
 using namespace llvm::opt;
 
+#if _WIN32 || _WIN64
+#define NULL_FILE "nul"
+#else
+#define NULL_FILE "/dev/null"
+#endif
+
 namespace {
 
 static void addBCLib(Compilation &C, const ArgList &Args,
@@ -197,7 +203,7 @@ void AMDGCN::constructHIPFatbinCommand(C
   // ToDo: Remove the dummy host binary entry which is required by
   // clang-offload-bundler.
   std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
-  std::string BundlerInputArg = "-inputs=/dev/null";
+  std::string BundlerInputArg = "-inputs=" NULL_FILE;
 
   for (const auto &II : Inputs) {
 const auto* A = II.getAction();


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r372394 - [CUDA][HIP] Fix hostness of defaulted constructor

2019-09-20 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Sep 20 07:28:09 2019
New Revision: 372394

URL: http://llvm.org/viewvc/llvm-project?rev=372394&view=rev
Log:
[CUDA][HIP] Fix hostness of defaulted constructor
Clang does not respect the explicit device host attributes of defaulted special 
members.
Also clang does not respect the hostness of special members determined by their
first declarations.
Clang also adds duplicate implicit device or host attributes in certain cases.
This patch fixes that.
Differential Revision: https://reviews.llvm.org/D67509

Added:
cfe/trunk/test/SemaCUDA/default-ctor.cu
Modified:
cfe/trunk/lib/Sema/SemaCUDA.cpp

Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=372394&r1=372393&r2=372394&view=diff
==
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Fri Sep 20 07:28:09 2019
@@ -267,6 +267,18 @@ bool Sema::inferCUDATargetForImplicitSpe
CXXMethodDecl *MemberDecl,
bool ConstRHS,
bool Diagnose) {
+  // If the defaulted special member is defined lexically outside of its
+  // owning class, or the special member already has explicit device or host
+  // attributes, do not infer.
+  bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent();
+  bool HasH = MemberDecl->hasAttr();
+  bool HasD = MemberDecl->hasAttr();
+  bool HasExplicitAttr =
+  (HasD && !MemberDecl->getAttr()->isImplicit()) ||
+  (HasH && !MemberDecl->getAttr()->isImplicit());
+  if (!InClass || HasExplicitAttr)
+return false;
+
   llvm::Optional InferredTarget;
 
   // We're going to invoke special member lookup; mark that these special
@@ -371,21 +383,24 @@ bool Sema::inferCUDATargetForImplicitSpe
 }
   }
 
+
+  // If no target was inferred, mark this member as __host__ __device__;
+  // it's the least restrictive option that can be invoked from any target.
+  bool NeedsH = true, NeedsD = true;
   if (InferredTarget.hasValue()) {
-if (InferredTarget.getValue() == CFT_Device) {
-  MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
-} else if (InferredTarget.getValue() == CFT_Host) {
-  MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
-} else {
-  MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
-  MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
-}
-  } else {
-// If no target was inferred, mark this member as __host__ __device__;
-// it's the least restrictive option that can be invoked from any target.
+if (InferredTarget.getValue() == CFT_Device)
+  NeedsH = false;
+else if (InferredTarget.getValue() == CFT_Host)
+  NeedsD = false;
+  }
+
+  // We either setting attributes first time, or the inferred ones must match
+  // previously set ones.
+  assert(!(HasD || HasH) || (NeedsD == HasD && NeedsH == HasH));
+  if (NeedsD && !HasD)
 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
+  if (NeedsH && !HasH)
 MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
-  }
 
   return false;
 }

Added: cfe/trunk/test/SemaCUDA/default-ctor.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/default-ctor.cu?rev=372394&view=auto
==
--- cfe/trunk/test/SemaCUDA/default-ctor.cu (added)
+++ cfe/trunk/test/SemaCUDA/default-ctor.cu Fri Sep 20 07:28:09 2019
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++11 -triple nvptx64-nvidia-cuda -fsyntax-only \
+// RUN:-fcuda-is-device -verify -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fsyntax-only \
+// RUN:-verify -verify-ignore-unexpected=note %s
+
+#include "Inputs/cuda.h"
+
+struct In { In() = default; };
+struct InD { __device__ InD() = default; };
+struct InH { __host__ InH() = default; };
+struct InHD { __host__ __device__ InHD() = default; };
+
+struct Out { Out(); };
+struct OutD { __device__ OutD(); };
+struct OutH { __host__ OutH(); };
+struct OutHD { __host__ __device__ OutHD(); };
+
+Out::Out() = default;
+__device__ OutD::OutD() = default;
+__host__ OutH::OutH() = default;
+__host__ __device__ OutHD::OutHD() = default;
+
+__device__ void fd() {
+  In in;
+  InD ind;
+  InH inh; // expected-error{{no matching constructor for initialization of 
'InH'}}
+  InHD inhd;
+  Out out; // expected-error{{no matching constructor for initialization of 
'Out'}}
+  OutD outd;
+  OutH outh; // expected-error{{no matching constructor for initialization of 
'OutH'}}
+  OutHD outhd;
+}
+
+__host__ void fh() {
+  In in;
+  InD ind; // expected-error{{no matching constructor for initialization of 
'InD'}}
+  InH inh;
+  InHD inhd;
+  Out out;
+  OutD outd; // expected

r372452 - Revert assertion added by r372394

2019-09-20 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Sep 20 19:51:44 2019
New Revision: 372452

URL: http://llvm.org/viewvc/llvm-project?rev=372452&view=rev
Log:
Revert assertion added by r372394

The assertion added by r372394 causes CUDA test in test-suite to assert.

The assertion was not there originally, so revert it.


Modified:
cfe/trunk/lib/Sema/SemaCUDA.cpp

Modified: cfe/trunk/lib/Sema/SemaCUDA.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaCUDA.cpp?rev=372452&r1=372451&r2=372452&view=diff
==
--- cfe/trunk/lib/Sema/SemaCUDA.cpp (original)
+++ cfe/trunk/lib/Sema/SemaCUDA.cpp Fri Sep 20 19:51:44 2019
@@ -396,7 +396,6 @@ bool Sema::inferCUDATargetForImplicitSpe
 
   // We either setting attributes first time, or the inferred ones must match
   // previously set ones.
-  assert(!(HasD || HasH) || (NeedsD == HasD && NeedsH == HasH));
   if (NeedsD && !HasD)
 MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
   if (NeedsH && !HasH)


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r372773 - [HIP] Support new kernel launching API

2019-09-24 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Tue Sep 24 12:16:40 2019
New Revision: 372773

URL: http://llvm.org/viewvc/llvm-project?rev=372773&view=rev
Log:
[HIP] Support new kernel launching API

Differential Revision: https://reviews.llvm.org/D67947

Modified:
cfe/trunk/include/clang/Basic/LangOptions.def
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/CodeGen/CGCUDANV.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
cfe/trunk/lib/Sema/SemaCUDA.cpp
cfe/trunk/test/CodeGenCUDA/Inputs/cuda.h
cfe/trunk/test/CodeGenCUDA/kernel-call.cu

Modified: cfe/trunk/include/clang/Basic/LangOptions.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.def?rev=372773&r1=372772&r2=372773&view=diff
==
--- cfe/trunk/include/clang/Basic/LangOptions.def (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.def Tue Sep 24 12:16:40 2019
@@ -226,6 +226,8 @@ LANGOPT(GPURelocatableDeviceCode, 1, 0,
 
 LANGOPT(SYCLIsDevice  , 1, 0, "Generate code for SYCL device")
 
+LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
+
 LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
 LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
 LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are 
unavailable")

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=372773&r1=372772&r2=372773&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Tue Sep 24 12:16:40 2019
@@ -599,6 +599,9 @@ def hip_device_lib_EQ : Joined<["--"], "
   HelpText<"HIP device library">;
 def fhip_dump_offload_linker_script : Flag<["-"], 
"fhip-dump-offload-linker-script">,
   Group, Flags<[NoArgumentUnused, HelpHidden]>;
+def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">,
+  Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">;
+def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">;
 def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, 
Group,
   HelpText<"Path to libomptarget-nvptx libraries">;
 def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>,

Modified: cfe/trunk/lib/CodeGen/CGCUDANV.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCUDANV.cpp?rev=372773&r1=372772&r2=372773&view=diff
==
--- cfe/trunk/lib/CodeGen/CGCUDANV.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCUDANV.cpp Tue Sep 24 12:16:40 2019
@@ -236,7 +236,8 @@ void CGNVCUDARuntime::emitDeviceStub(Cod
 
   EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
   if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
- CudaFeature::CUDA_USES_NEW_LAUNCH))
+ CudaFeature::CUDA_USES_NEW_LAUNCH) ||
+  CGF.getLangOpts().HIPUseNewLaunchAPI)
 emitDeviceStubBodyNew(CGF, Args);
   else
 emitDeviceStubBodyLegacy(CGF, Args);
@@ -264,14 +265,18 @@ void CGNVCUDARuntime::emitDeviceStubBody
 
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
 
-  // Lookup cudaLaunchKernel function.
+  // Lookup cudaLaunchKernel/hipLaunchKernel function.
   // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 
blockDim,
   //  void **args, size_t sharedMem,
   //  cudaStream_t stream);
+  // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+  //void **args, size_t sharedMem,
+  //hipStream_t stream);
   TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
   DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+  auto LaunchKernelName = addPrefixToName("LaunchKernel");
   IdentifierInfo &cudaLaunchKernelII =
-  CGM.getContext().Idents.get("cudaLaunchKernel");
+  CGM.getContext().Idents.get(LaunchKernelName);
   FunctionDecl *cudaLaunchKernelFD = nullptr;
   for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) {
 if (FunctionDecl *FD = dyn_cast(Result))
@@ -280,7 +285,7 @@ void CGNVCUDARuntime::emitDeviceStubBody
 
   if (cudaLaunchKernelFD == nullptr) {
 CGM.Error(CGF.CurFuncDecl->getLocation(),
-  "Can't find declaration for cudaLaunchKernel()");
+  "Can't find declaration for " + LaunchKernelName);
 return;
   }
   // Create temporary dim3 grid_dim, block_dim.
@@ -301,7 +306,7 @@ void CGNVCUDARuntime::emitDeviceStubBody
/*ShmemSize=*/ShmemSize.getType(),
/*Stream=*/Stream.getType()},
   /*isVarArg=*/false),
-  "__cudaPopCallConfigura

r373561 - [HIP] Support -emit-llvm for device compilation

2019-10-02 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Oct  2 20:27:43 2019
New Revision: 373561

URL: http://llvm.org/viewvc/llvm-project?rev=373561&view=rev
Log:
[HIP] Support -emit-llvm for device compilation

Sometimes it is useful to compile HIP device code to LLVM BC. It is not 
convenient to use clang -cc1 since
there are lots of options needed.

This patch allows clang driver to compile HIP device code to LLVM BC with 
-emit-llvm -c.

Differential Revision: https://reviews.llvm.org/D68284

Added:
cfe/trunk/test/Driver/hip-device-compile.hip
Modified:
cfe/trunk/lib/Driver/Driver.cpp

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=373561&r1=373560&r2=373561&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Wed Oct  2 20:27:43 2019
@@ -2312,6 +2312,8 @@ class OffloadingActionBuilder final {
 /// compilation.
 bool CompileHostOnly = false;
 bool CompileDeviceOnly = false;
+bool EmitLLVM = false;
+bool EmitAsm = false;
 
 /// List of GPU architectures to use in this compilation.
 SmallVector GpuArchList;
@@ -2478,6 +2480,8 @@ class OffloadingActionBuilder final {
   CompileDeviceOnly = PartialCompilationArg &&
   PartialCompilationArg->getOption().matches(
   options::OPT_cuda_device_only);
+  EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
+  EmitAsm = Args.getLastArg(options::OPT_S);
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
   std::set GpuArchs;
@@ -2664,7 +2668,8 @@ class OffloadingActionBuilder final {
   assert(!CompileHostOnly &&
  "Not expecting CUDA actions in host-only compilation.");
 
-  if (!Relocatable && CurPhase == phases::Backend) {
+  if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
+  !EmitAsm) {
 // If we are in backend phase, we attempt to generate the fat binary.
 // We compile each arch to IR and use a link action to generate code
 // object containing ISA. Then we use a special "link" action to create
@@ -2732,7 +2737,8 @@ class OffloadingActionBuilder final {
 A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
AssociatedOffloadKind);
 
-  return ABRT_Success;
+  return (CompileDeviceOnly && CurPhase == FinalPhase) ? ABRT_Ignore_Host
+   : ABRT_Success;
 }
 
 void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {

Added: cfe/trunk/test/Driver/hip-device-compile.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-device-compile.hip?rev=373561&view=auto
==
--- cfe/trunk/test/Driver/hip-device-compile.hip (added)
+++ cfe/trunk/test/Driver/hip-device-compile.hip Wed Oct  2 20:27:43 2019
@@ -0,0 +1,72 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// If -emit-llvm and/or -S is used in device only compilation,
+// the output should not be bundled.
+
+// RUN: %clang -c -emit-llvm --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.bc -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,BC %s
+
+// RUN: %clang -c -S -emit-llvm --cuda-device-only -### -target 
x86_64-linux-gnu \
+// RUN:   -o a.ll -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LL %s
+
+// RUN: %clang -c -S --cuda-device-only -### -target x86_64-linux-gnu \
+// RUN:   -o a.s -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   --hip-device-lib=lib1.bc \
+// RUN:   --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,ASM %s
+
+// CHECK: {{".*clang.*"}} "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// BC-SAME: "-emit-llvm-bc"
+// LL-SAME: "-emit-llvm"
+// ASM-NOT: "-emit-llvm"
+// CHECK-SAME: "-main-file-name" "a.cu" {{.*}} "-target-cpu" "gfx900"
+// CHECK-SAME: "-fcuda-is-device"
+// CHECK-SAME: {{".*lib1.bc"}}
+// BC-SAME: "-o" "a.bc"
+// LL-SAME: "-o" "a.ll"
+// ASM-SAME: "-o" "a.s"
+// CHECK-SAME: {{".*a.cu"}}
+
+// CHECK-NOT: {{"*.llvm-link"}}
+// CHECK-NOT: {{".*opt"}}
+// CHECK-NOT: {{".*llc"}}
+// CHECK-NOT: {{".*lld"}}
+// CHECK-NOT: {{".*clang-offload-bundler"}}
+// CHECK-NOT: {{".*ld.*"}}
+
+// If neither -emit-llvm nor

r373649 - [HIP] Use option -nogpulib to disable linking device lib

2019-10-03 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Thu Oct  3 11:59:56 2019
New Revision: 373649

URL: http://llvm.org/viewvc/llvm-project?rev=373649&view=rev
Log:
[HIP] Use option -nogpulib to disable linking device lib

Differential Revision: https://reviews.llvm.org/D68300

Added:
cfe/trunk/test/Driver/hip-no-device-libs.hip
Modified:
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
cfe/trunk/lib/Driver/ToolChains/HIP.cpp

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=373649&r1=373648&r2=373649&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Thu Oct  3 11:59:56 2019
@@ -2556,7 +2556,9 @@ def no__dead__strip__inits__and__terms :
 def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option, CoreOption]>,
   HelpText<"Disable builtin #include directories">;
 def nocudainc : Flag<["-"], "nocudainc">;
-def nocudalib : Flag<["-"], "nocudalib">;
+def nogpulib : Flag<["-"], "nogpulib">,
+  HelpText<"Do not link device library for CUDA/HIP device compilation">;
+def : Flag<["-"], "nocudalib">, Alias;
 def nodefaultlibs : Flag<["-"], "nodefaultlibs">;
 def nofixprebinding : Flag<["-"], "nofixprebinding">;
 def nolibc : Flag<["-"], "nolibc">;

Modified: cfe/trunk/lib/Driver/ToolChains/Cuda.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Cuda.cpp?rev=373649&r1=373648&r2=373649&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/Cuda.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Cuda.cpp Thu Oct  3 11:59:56 2019
@@ -121,7 +121,7 @@ CudaInstallationDetector::CudaInstallati
   Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
   }
 
-  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
+  bool NoCudaLib = Args.hasArg(options::OPT_nogpulib);
 
   for (const auto &Candidate : Candidates) {
 InstallPath = Candidate.Path;
@@ -628,7 +628,7 @@ void CudaToolChain::addClangTargetOption
   CC1Args.push_back("-fgpu-rdc");
   }
 
-  if (DriverArgs.hasArg(options::OPT_nocudalib))
+  if (DriverArgs.hasArg(options::OPT_nogpulib))
 return;
 
   std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);

Modified: cfe/trunk/lib/Driver/ToolChains/HIP.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/HIP.cpp?rev=373649&r1=373648&r2=373649&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/HIP.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/HIP.cpp Thu Oct  3 11:59:56 2019
@@ -286,6 +286,9 @@ void HIPToolChain::addClangTargetOptions
 CC1Args.append({"-fvisibility", "hidden"});
 CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
+
+  if (DriverArgs.hasArg(options::OPT_nogpulib))
+return;
   ArgStringList LibraryPaths;
 
   // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.

Added: cfe/trunk/test/Driver/hip-no-device-libs.hip
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/hip-no-device-libs.hip?rev=373649&view=auto
==
--- cfe/trunk/test/Driver/hip-no-device-libs.hip (added)
+++ cfe/trunk/test/Driver/hip-no-device-libs.hip Thu Oct  3 11:59:56 2019
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -nogpulib -target x86_64-linux-gnu \
+// RUN:   -x hip --cuda-gpu-arch=gfx900 \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK-NOT: "-mlink-builtin-bitcode"
+


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r330279 - [HIP] Add driver input type for HIP

2018-04-18 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Wed Apr 18 11:25:03 2018
New Revision: 330279

URL: http://llvm.org/viewvc/llvm-project?rev=330279&view=rev
Log:
[HIP] Add driver input type for HIP

Patch by Greg Rodgers.
Revised by Yaxun Liu.

Differential Revision: https://reviews.llvm.org/D45489

Modified:
cfe/trunk/include/clang/Driver/Types.def
cfe/trunk/lib/Driver/Driver.cpp
cfe/trunk/lib/Driver/Types.cpp

Modified: cfe/trunk/include/clang/Driver/Types.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Types.def?rev=330279&r1=330278&r2=330279&view=diff
==
--- cfe/trunk/include/clang/Driver/Types.def (original)
+++ cfe/trunk/include/clang/Driver/Types.def Wed Apr 18 11:25:03 2018
@@ -46,6 +46,9 @@ TYPE("cl",   CL,
 TYPE("cuda-cpp-output",  PP_CUDA,  INVALID, "cui",   "u")
 TYPE("cuda", CUDA, PP_CUDA, "cu","u")
 TYPE("cuda", CUDA_DEVICE,  PP_CUDA, "cu","")
+TYPE("hip-cpp-output",   PP_HIP,   INVALID, "cui",   "u")
+TYPE("hip",  HIP,  PP_HIP,  "cu","u")
+TYPE("hip",  HIP_DEVICE,   PP_HIP,  "cu","")
 TYPE("objective-c-cpp-output",   PP_ObjC,  INVALID, "mi","u")
 TYPE("objc-cpp-output",  PP_ObjC_Alias, INVALID,"mi","u")
 TYPE("objective-c",  ObjC, PP_ObjC, "m", "u")

Modified: cfe/trunk/lib/Driver/Driver.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Driver.cpp?rev=330279&r1=330278&r2=330279&view=diff
==
--- cfe/trunk/lib/Driver/Driver.cpp (original)
+++ cfe/trunk/lib/Driver/Driver.cpp Wed Apr 18 11:25:03 2018
@@ -2257,9 +2257,10 @@ class OffloadingActionBuilder final {
 assert(!GpuArchList.empty() &&
"We should have at least one GPU architecture.");
 
-// If the host input is not CUDA, we don't need to bother about this
-// input.
-if (IA->getType() != types::TY_CUDA) {
+// If the host input is not CUDA or HIP, we don't need to bother about
+// this input.
+if (IA->getType() != types::TY_CUDA &&
+IA->getType() != types::TY_HIP) {
   // The builder will ignore this input.
   IsActive = false;
   return ABRT_Inactive;
@@ -2272,9 +2273,12 @@ class OffloadingActionBuilder final {
   return ABRT_Success;
 
 // Replicate inputs for each GPU architecture.
-for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-  CudaDeviceActions.push_back(C.MakeAction(
-  IA->getInputArg(), types::TY_CUDA_DEVICE));
+auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
+ : types::TY_CUDA_DEVICE;
+for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+  CudaDeviceActions.push_back(
+  C.MakeAction(IA->getInputArg(), Ty));
+}
 
 return ABRT_Success;
   }

Modified: cfe/trunk/lib/Driver/Types.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/Types.cpp?rev=330279&r1=330278&r2=330279&view=diff
==
--- cfe/trunk/lib/Driver/Types.cpp (original)
+++ cfe/trunk/lib/Driver/Types.cpp Wed Apr 18 11:25:03 2018
@@ -102,6 +102,9 @@ bool types::isAcceptedByClang(ID Id) {
   case TY_CL:
   case TY_CUDA: case TY_PP_CUDA:
   case TY_CUDA_DEVICE:
+  case TY_HIP:
+  case TY_PP_HIP:
+  case TY_HIP_DEVICE:
   case TY_ObjC: case TY_PP_ObjC: case TY_PP_ObjC_Alias:
   case TY_CXX: case TY_PP_CXX:
   case TY_ObjCXX: case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias:
@@ -141,6 +144,9 @@ bool types::isCXX(ID Id) {
   case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader:
   case TY_CXXModule: case TY_PP_CXXModule:
   case TY_CUDA: case TY_PP_CUDA: case TY_CUDA_DEVICE:
+  case TY_HIP:
+  case TY_PP_HIP:
+  case TY_HIP_DEVICE:
 return true;
   }
 }
@@ -166,6 +172,9 @@ bool types::isCuda(ID Id) {
   case TY_CUDA:
   case TY_PP_CUDA:
   case TY_CUDA_DEVICE:
+  case TY_HIP:
+  case TY_PP_HIP:
+  case TY_HIP_DEVICE:
 return true;
   }
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

r330447 - [CUDA] Set LLVM calling convention for CUDA kernel

2018-04-20 Thread Yaxun Liu via cfe-commits

Author: yaxunl
Date: Fri Apr 20 10:01:03 2018
New Revision: 330447

URL: http://llvm.org/viewvc/llvm-project?rev=330447&view=rev
Log:
[CUDA] Set LLVM calling convention for CUDA kernel

Some targets need special LLVM calling convention for CUDA kernel.
This patch does that through a TargetCodeGenInfo hook.

It only affects amdgcn target.

Patch by Greg Rodgers.
Revised and lit tests added by Yaxun Liu.

Differential Revision: https://reviews.llvm.org/D45223

Added:
cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu
Modified:
cfe/trunk/lib/CodeGen/CodeGenModule.cpp
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/lib/CodeGen/TargetInfo.h

Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=330447&r1=330446&r2=330447&view=diff
==
--- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
+++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Fri Apr 20 10:01:03 2018
@@ -3627,6 +3627,9 @@ void CodeGenModule::EmitGlobalFunctionDe
 
   MaybeHandleStaticInExternC(D, Fn);
 
+  if (D->hasAttr())
+getTargetCodeGenInfo().setCUDAKernelCallingConvention(Fn);
+
   maybeSetTrivialComdat(*D, *Fn);
 
   CodeGenFunction(*this).GenerateCode(D, Fn, FI);

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=330447&r1=330446&r2=330447&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Fri Apr 20 10:01:03 2018
@@ -7637,6 +7637,7 @@ public:
 llvm::Function *BlockInvokeFunc,
 llvm::Value *BlockLiteral) const override;
   bool shouldEmitStaticExternCAliases() const override;
+  void setCUDAKernelCallingConvention(llvm::Function *F) const override;
 };
 }
 
@@ -7772,6 +7773,11 @@ bool AMDGPUTargetCodeGenInfo::shouldEmit
   return false;
 }
 
+void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
+llvm::Function *F) const {
+  F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+}
+
 
//===--===//
 // SPARC v8 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.

Modified: cfe/trunk/lib/CodeGen/TargetInfo.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.h?rev=330447&r1=330446&r2=330447&view=diff
==
--- cfe/trunk/lib/CodeGen/TargetInfo.h (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.h Fri Apr 20 10:01:03 2018
@@ -301,6 +301,8 @@ public:
   /// mangled name of functions declared within an extern "C" region and marked
   /// as 'used', and having internal linkage.
   virtual bool shouldEmitStaticExternCAliases() const { return true; }
+
+  virtual void setCUDAKernelCallingConvention(llvm::Function *F) const {}
 };
 
 } // namespace CodeGen

Added: cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu?rev=330447&view=auto
==
--- cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu (added)
+++ cfe/trunk/test/CodeGenCUDA/kernel-amdgcn.cu Fri Apr 20 10:01:03 2018
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | 
FileCheck %s
+#include "Inputs/cuda.h"
+
+// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
+class A {
+public:
+  static __global__ void kernel(){}
+};
+
+// CHECK: define void @_Z10non_kernelv
+__device__ void non_kernel(){}
+
+// CHECK: define amdgpu_kernel void @_Z6kerneli
+__global__ void kernel(int x) {
+  non_kernel();
+}
+
+// CHECK: define amdgpu_kernel void @_Z11EmptyKernelIvEvv
+template 
+__global__ void EmptyKernel(void) {}
+
+struct Dummy {
+  /// Type definition of the EmptyKernel kernel entry point
+  typedef void (*EmptyKernelPtr)();
+  EmptyKernelPtr Empty() { return EmptyKernel; } 
+};
+
+// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
+template
+__global__ void template_kernel(T x) {}
+
+void launch(void *f);
+
+int main() {
+  Dummy D;
+  launch((void*)A::kernel);
+  launch((void*)kernel);
+  launch((void*)template_kernel);
+  launch((void*)D.Empty());
+  return 0;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1625 matches

Mail list logo