yaxunl updated this revision to Diff 90243. yaxunl edited the summary of this revision.
https://reviews.llvm.org/D27627 Files: include/clang/AST/ASTContext.h include/clang/Basic/TargetInfo.h lib/AST/ASTContext.cpp lib/Basic/TargetInfo.cpp lib/Basic/Targets.cpp lib/CodeGen/CGBuiltin.cpp lib/CodeGen/CGCall.cpp lib/CodeGen/CGClass.cpp lib/CodeGen/CGDecl.cpp lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CGException.cpp lib/CodeGen/CGExpr.cpp lib/CodeGen/CGExprCXX.cpp lib/CodeGen/CGExprConstant.cpp lib/CodeGen/CGExprScalar.cpp lib/CodeGen/CGGPUBuiltin.cpp lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGVTT.cpp lib/CodeGen/CGVTables.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenTypes.cpp lib/CodeGen/CodeGenTypes.h lib/CodeGen/ItaniumCXXABI.cpp test/CodeGenCUDA/address-spaces.cu test/CodeGenCUDA/convergent.cu test/CodeGenCUDA/device-var-init.cu test/CodeGenCUDA/device-vtable.cu test/CodeGenCUDA/filter-decl.cu test/CodeGenCUDA/function-overload.cu test/CodeGenCUDA/kernel-args-alignment.cu test/CodeGenCUDA/llvm-used.cu test/CodeGenCUDA/printf.cu test/CodeGenCXX/amdgcn-global-init.cpp test/OpenMP/nvptx_parallel_codegen.cpp
Index: test/OpenMP/nvptx_parallel_codegen.cpp =================================================================== --- test/OpenMP/nvptx_parallel_codegen.cpp +++ test/OpenMP/nvptx_parallel_codegen.cpp @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=amdgcn -emit-llvm-bc %s -o %t-x86-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics @@ -62,14 +63,14 @@ return a; } - // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l17}}_worker() + // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l18}}_worker() - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l27}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -122,7 +123,7 @@ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l26]](i[[SZ:32|64]] + // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l27]](i[[SZ:32|64]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]], // CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] @@ -194,7 +195,7 @@ - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l43}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l44}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -238,7 +239,7 @@ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l43]](i[[SZ:32|64]] + // CHECK: define {{.*}}void [[T6:@__omp_offloading_.+template.+l44]](i[[SZ:32|64]] // Create local storage for each capture. // CHECK: [[LOCAL_N:%.+]] = alloca i[[SZ]], // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]], Index: test/CodeGenCXX/amdgcn-global-init.cpp =================================================================== --- /dev/null +++ test/CodeGenCXX/amdgcn-global-init.cpp @@ -0,0 +1,211 @@ +// RUN: %clang_cc1 -std=c++11 -triple=amdgcn-amd-amdhsa -emit-llvm -fexceptions %s -o - |FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple=amdgcn-amd-amdhsa -emit-llvm %s -o - |FileCheck -check-prefix CHECK-NOEXC %s +// RUN: %clang_cc1 -std=c++11 -triple=amdgcn-amd-amdhsa -emit-llvm \ +// RUN: -momit-leaf-frame-pointer -mdisable-fp-elim %s -o - \ +// RUN: | FileCheck -check-prefix CHECK-FP %s + +struct A { + A(); + ~A(); +}; + +struct B { B(); ~B(); }; + +struct C { void *field; }; + +struct D { ~D(); }; + +// CHECK: @__dso_handle = external hidden addrspace(1) global i8 +// CHECK: @c = addrspace(1) global %struct.C zeroinitializer, align 8 + +// PR6205: The casts should not require global initializers +// CHECK: @_ZN6PR59741cE = external addrspace(1) global %"struct.PR5974::C" +// CHECK: @_ZN6PR59741aE = addrspace(1) global %"struct.PR5974::A" addrspace(4)* addrspacecast (%"struct.PR5974::A" addrspace(1)* getelementptr inbounds (%"struct.PR5974::C", %"struct.PR5974::C" addrspace(1)* @_ZN6PR59741cE, i32 0, i32 0) to %"struct.PR5974::A" addrspace(4)*), align 8 +// CHECK: @_ZN6PR59741bE = addrspace(1) global %"struct.PR5974::B" addrspace(4)* bitcast (i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast (%"struct.PR5974::C" addrspace(1)* @_ZN6PR59741cE to i8 addrspace(1)*) to i8 addrspace(4)*), i64 4) to %"struct.PR5974::B" addrspace(4)*), align 8 + +// CHECK: call void @_ZN1AC1Ev(%struct.A addrspace(4)* addrspacecast (%struct.A addrspace(1)* @a to %struct.A addrspace(4)*)) +// CHECK: call i32 @__cxa_atexit(void (i8 addrspace(4)*)* bitcast (void (%struct.A addrspace(4)*)* @_ZN1AD1Ev to void (i8 addrspace(4)*)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%struct.A, %struct.A addrspace(1)* @a, i32 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @__dso_handle to i8 addrspace(4)*)) +A a; + +// CHECK: call void @_ZN1BC1Ev(%struct.B addrspace(4)* addrspacecast (%struct.B addrspace(1)* @b to %struct.B addrspace(4)*)) +// CHECK: call i32 @__cxa_atexit(void (i8 addrspace(4)*)* bitcast (void (%struct.B addrspace(4)*)* @_ZN1BD1Ev to void (i8 addrspace(4)*)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%struct.B, %struct.B addrspace(1)* @b, i32 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @__dso_handle to i8 addrspace(4)*)) +B b; + +// PR6205: this should not require a global initializer +// CHECK-NOT: call void @_ZN1CC1Ev +C c; + +// CHECK: call i32 @__cxa_atexit(void (i8 addrspace(4)*)* bitcast (void (%struct.D addrspace(4)*)* @_ZN1DD1Ev to void (i8 addrspace(4)*)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds (%struct.D, %struct.D addrspace(1)* @d, i32 0, i32 0) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* @__dso_handle to i8 addrspace(4)*)) +D d; + +// <rdar://problem/7458115> +namespace test1 { + int f(); + const int x = f(); // This has side-effects and gets emitted immediately. + const int y = x - 1; // This gets deferred. + const int z = ~y; // This also gets deferred, but gets "undeferred" before y. + int test() { return z; } +// CHECK-LABEL: define i32 @_ZN5test14testEv() + + // All of these initializers end up delayed, so we check them later. +} + +// <rdar://problem/8246444> +namespace test2 { + struct allocator { allocator(); ~allocator(); }; + struct A { A(const allocator &a = allocator()); ~A(); }; + + A a; +// CHECK: call void @_ZN5test29allocatorC1Ev( +// CHECK: invoke void @_ZN5test21AC1ERKNS_9allocatorE( +// CHECK: call void @_ZN5test29allocatorD1Ev( +// CHECK: call i32 @__cxa_atexit({{.*}} @_ZN5test21AD1Ev {{.*}} @_ZN5test21aE +} + +namespace test3 { + // Tested at the beginning of the file. + const char * const var = "string"; + extern const char * const var; + + const char *test() { return var; } +} + +namespace test4 { + struct A { + A(); + }; + extern int foo(); + + // This needs an initialization function and guard variables. + // CHECK: load i8, i8 addrspace(1)* bitcast (i64 addrspace(1)* @_ZGVN5test41xE to i8 addrspace(1)*) + // CHECK: [[CALL:%.*]] = call i32 @_ZN5test43fooEv + // CHECK-NEXT: store i32 %call, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @_ZN5test41xE to i32 addrspace(4)*) + // CHECK-NEXT: store i64 1, i64 addrspace(1)* @_ZGVN5test41xE + __attribute__((weak)) int x = foo(); +} + +namespace PR5974 { + struct A { int a; }; + struct B { int b; }; + struct C : A, B { int c; }; + + extern C c; + + // These should not require global initializers. + A* a = &c; + B* b = &c; +} + +// PR9570: the indirect field shouldn't crash IR gen. +namespace test5 { + static union { + unsigned bar[4096] __attribute__((aligned(128))); + }; +} + +namespace std { struct type_info; } + +namespace test6 { + struct A { virtual ~A(); }; + struct B : A {}; + extern A *p; + + // We must emit a dynamic initializer for 'q', because it could throw. + B *const q = &dynamic_cast<B&>(*p); + // CHECK: call void @__cxa_bad_cast() + // CHECK: store {{.*}} @_ZN5test6L1qE + + // We don't need to emit 'r' at all, because it has internal linkage, is + // unused, and its initialization has no side-effects. + B *const r = dynamic_cast<B*>(p); + // CHECK-NOT: call void @__cxa_bad_cast() + // CHECK-NOT: store {{.*}} @_ZN5test6L1rE + + // This can throw, so we need to emit it. + const std::type_info *const s = &typeid(*p); + // CHECK: store {{.*}} @_ZN5test6L1sE + + // This can't throw, so we don't. + const std::type_info *const t = &typeid(p); + // CHECK-NOT: @_ZN5test6L1tE + + extern B *volatile v; + // CHECK: store {{.*}} @_ZN5test6L1wE + B *const w = dynamic_cast<B*>(v); + + // CHECK: load volatile + // CHECK: store {{.*}} @_ZN5test6L1xE + const int x = *(volatile int*)0x1234; + + namespace { + int a = int(); + volatile int b = int(); + int c = a; + int d = b; + // CHECK-NOT: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1aE + // CHECK-NOT: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1bE + // CHECK-NOT: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1cE + // CHECK: load volatile {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1bE + // CHECK: store {{.*}} @_ZN5test6{{[A-Za-z0-9_]*}}1dE + } +} + +namespace test7 { + struct A { A(); }; + struct B { ~B(); int n; }; + struct C { C() = default; C(const C&); int n; }; + struct D {}; + + // CHECK: call void @_ZN5test71AC1Ev({{.*}}@_ZN5test7L1aE + const A a = A(); + + // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN5test71BD1Ev{{.*}} @_ZN5test7L2b1E + // CHECK: call i32 @__cxa_atexit({{.*}} @_ZN5test71BD1Ev{{.*}} @_ZGRN5test72b2E + // CHECK: call void @_ZN5test71BD1Ev( + // CHECK: store {{.*}} @_ZN5test7L2b3E + const B b1 = B(); + const B &b2 = B(); + const int b3 = B().n; + + // CHECK-NOT: @_ZN5test7L2c1E + // CHECK: call void @llvm.memset{{.*}} @_ZN5test7L2c1E + // CHECK-NOT: @_ZN5test7L2c1E + // CHECK: @_ZN5test7L2c2E + // CHECK-NOT: @_ZN5test7L2c3E + // CHECK: @_ZN5test7L2c4E + const C c1 = C(); + const C c2 = static_cast<const C&>(C()); + const int c3 = C().n; + const int c4 = C(C()).n; + + // CHECK-NOT: @_ZN5test7L1dE + const D d = D(); + + // CHECK: store {{.*}} @_ZN5test71eE + int f(), e = f(); +} + + +// At the end of the file, we check that y is initialized before z. + +// CHECK: define internal void [[TEST1_Z_INIT:@.*]]() +// CHECK: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1yE to i32 addrspace(4)*) +// CHECK-NEXT: xor +// CHECK-NEXT: store i32 {{.*}}, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1zE to i32 addrspace(4)*) +// CHECK: define internal void [[TEST1_Y_INIT:@.*]]() +// CHECK: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1xE to i32 addrspace(4)*) +// CHECK-NEXT: sub +// CHECK-NEXT: store i32 {{.*}}, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @_ZN5test1L1yE to i32 addrspace(4)*) + +// CHECK: define internal void @_GLOBAL__sub_I_amdgcn_global_init.cpp() #{{[0-9]+}} +// CHECK: call void [[TEST1_Y_INIT]] +// CHECK: call void [[TEST1_Z_INIT]] + +// rdar://problem/8090834: this should be nounwind +// CHECK-NOEXC: define internal void @_GLOBAL__sub_I_amdgcn_global_init.cpp() [[NUW:#[0-9]+]] + +// CHECK-NOEXC: attributes [[NUW]] = { noinline nounwind{{.*}} } + +// PR21811: attach the appropriate attribute to the global init function +// CHECK-FP: define internal void @_GLOBAL__sub_I_amdgcn_global_init.cpp() [[NUX:#[0-9]+]] +// CHECK-FP: attributes [[NUX]] = { noinline nounwind {{.*}}"no-frame-pointer-elim-non-leaf"{{.*}} } Index: test/CodeGenCUDA/printf.cu =================================================================== --- test/CodeGenCUDA/printf.cu +++ test/CodeGenCUDA/printf.cu @@ -2,38 +2,46 @@ // REQUIRES: nvptx-registered-target // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \ -// RUN: -o - %s | FileCheck %s +// RUN: -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm \ +// RUN: -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s #include "Inputs/cuda.h" extern "C" __device__ int vprintf(const char*, const char*); // Check a simple call to printf end-to-end. // CHECK: [[SIMPLE_PRINTF_TY:%[a-zA-Z0-9_]+]] = type { i32, i64, double } +// CHECK-LABEL: define i32 @_Z11CheckSimplev() __device__ int CheckSimple() { - // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]] + // NVPTX: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]] + // AMDGCN: [[ALLOCA:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]] + // AMDGCN: [[BUF:%[a-zA-Z0-9_]+]] = addrspacecast %printf_args* [[ALLOCA]] to %printf_args addrspace(4)* // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt const char* fmt = "%d %lld %f"; - // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 0 - // CHECK: store i32 1, i32* [[PTR0]], align 4 - // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 1 - // CHECK: store i64 2, i64* [[PTR1]], align 8 - // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 2 - // CHECK: store double 3.0{{[^,]*}}, double* [[PTR2]], align 8 - // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]* [[BUF]] to i8* - // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF_CAST]]) + // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]], i32 0, i32 0 + // CHECK: store i32 1, i32{{.*}}* [[PTR0]], align 4 + // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]], i32 0, i32 1 + // CHECK: store i64 2, i64{{.*}}* [[PTR1]], align 8 + // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]], i32 0, i32 2 + // CHECK: store double 3.0{{[^,]*}}, double{{.*}}* [[PTR2]], align 8 + // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]{{.*}}* [[BUF]] to i8{{.*}}* + // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8{{.*}}* [[FMT]], i8{{.*}}* [[BUF_CAST]]) // CHECK: ret i32 [[RET]] return printf(fmt, 1, 2ll, 3.0); } +// CHECK-LABEL: define void @_Z11CheckNoArgsv() __device__ void CheckNoArgs() { - // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}} + // CHECK: call i32 @vprintf({{.*}}, i8{{.*}}* null){{$}} printf("hello, world!"); } // Check that printf's alloca happens in the entry block, not inside the if // statement. __device__ bool foo(); +// CHECK-LABEL: define void @_Z25CheckAllocaIsInEntryBlockv() __device__ void CheckAllocaIsInEntryBlock() { // CHECK: alloca %printf_args // CHECK: call {{.*}} @_Z3foov() Index: test/CodeGenCUDA/llvm-used.cu =================================================================== --- test/CodeGenCUDA/llvm-used.cu +++ test/CodeGenCUDA/llvm-used.cu @@ -1,8 +1,10 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx64-unknown-unknown | FileCheck -check-prefix=NVPTX %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn-amd-amdhsa | FileCheck -check-prefix=AMDGCN %s // Make sure we emit the proper addrspacecast for llvm.used. PR22383 exposed an // issue where we were generating a bitcast instead of an addrspacecast. -// CHECK: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8*)], section "llvm.metadata" +// NVPTX: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8*)], section "llvm.metadata" +// AMDGCN: @llvm.used = appending global [1 x i8 addrspace(4)*] [i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ([0 x i32] addrspace(1)* @a to i8 addrspace(1)*) to i8 addrspace(4)*)], section "llvm.metadata" __attribute__((device)) __attribute__((__used__)) int a[] = {}; Index: test/CodeGenCUDA/kernel-args-alignment.cu =================================================================== --- test/CodeGenCUDA/kernel-args-alignment.cu +++ test/CodeGenCUDA/kernel-args-alignment.cu @@ -1,8 +1,11 @@ // RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix HOST -check-prefix CHECK %s +// RUN: FileCheck -check-prefixes=HOST,CHECK %s // RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefix DEVICE -check-prefix CHECK %s +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK,NVPTX %s + +// RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple amdgcn-amd-amdhsa \ +// RUN: -emit-llvm -o - %s -DAMDGCN| FileCheck -check-prefixes=DEVICE,CHECK,AMDGCN %s #include "Inputs/cuda.h" @@ -18,7 +21,9 @@ // Clang should generate a packed LLVM struct for S (denoted by the <>s), // otherwise this test isn't interesting. -// CHECK: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> +// HOST: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> +// NVPTX: %struct.S = type <{ i32*, i8, %struct.U, [5 x i8] }> +// AMDGCN: %struct.S = type <{ i32 addrspace(4)*, i8, %struct.U, [5 x i8] }> static_assert(alignof(S) == 8, "Unexpected alignment."); @@ -32,5 +37,6 @@ // HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24) // DEVICE-LABEL: @_Z6kernelc1SPi -// DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32* +// NVPTX-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32* +// AMDGCN-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32 addrspace(4)* __global__ void kernel(char a, S s, int *b) {} Index: test/CodeGenCUDA/function-overload.cu =================================================================== --- test/CodeGenCUDA/function-overload.cu +++ test/CodeGenCUDA/function-overload.cu @@ -8,6 +8,8 @@ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \ // RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s #include "Inputs/cuda.h" Index: test/CodeGenCUDA/filter-decl.cu =================================================================== --- test/CodeGenCUDA/filter-decl.cu +++ test/CodeGenCUDA/filter-decl.cu @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-HOST %s -// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefix=CHECK-DEVICE %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefixes=CHECK-DEVICE,ITANIUM %s +// RUN: %clang_cc1 -triple amdgcn -emit-llvm %s -o - -fcuda-is-device | FileCheck -check-prefixes=CHECK-DEVICE,AMDGCN %s #include "Inputs/cuda.h" @@ -10,15 +11,18 @@ __asm__("file scope asm is host only"); // CHECK-HOST: constantdata = internal global -// CHECK-DEVICE: constantdata = externally_initialized global +// ITANIUM: constantdata = externally_initialized global +// AMDGCN: constantdata = addrspace(2) externally_initialized global __constant__ char constantdata[256]; // CHECK-HOST: devicedata = internal global -// CHECK-DEVICE: devicedata = externally_initialized global +// ITANIUM: devicedata = externally_initialized global +// AMDGCN: devicedata = addrspace(1) externally_initialized global __device__ char devicedata[256]; // CHECK-HOST: shareddata = internal global -// CHECK-DEVICE: shareddata = global +// ITANIUM: shareddata = global +// AMDGCN: shareddata = addrspace(3) global __shared__ char shareddata[256]; // CHECK-HOST: hostdata = global Index: test/CodeGenCUDA/device-vtable.cu =================================================================== --- test/CodeGenCUDA/device-vtable.cu +++ test/CodeGenCUDA/device-vtable.cu @@ -10,6 +10,8 @@ // RUN: | FileCheck %s -check-prefix=CHECK-HOST -check-prefix=CHECK-BOTH // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \ // RUN: | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -o - %s \ +// RUN: | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH #include "Inputs/cuda.h" Index: test/CodeGenCUDA/device-var-init.cu =================================================================== --- test/CodeGenCUDA/device-var-init.cu +++ test/CodeGenCUDA/device-var-init.cu @@ -4,7 +4,10 @@ // variables, but accept empty constructors allowed by CUDA. // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ -// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,NVPTX %s + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCN %s #ifdef __clang__ #include "Inputs/cuda.h" @@ -18,7 +21,8 @@ __shared__ int s_v; // CHECK: @s_v = addrspace(3) global i32 undef, __constant__ int c_v; -// CHECK: addrspace(4) externally_initialized global i32 0, +// NVPTX: addrspace(4) externally_initialized global i32 0, +// AMDGCN: addrspace(2) externally_initialized global i32 0, __device__ int d_v_i = 1; // CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, @@ -29,81 +33,92 @@ __shared__ T s_t; // CHECK: @s_t = addrspace(3) global %struct.T undef, __constant__ T c_t; -// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, +// NVPTX: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, +// AMDGCN: @c_t = addrspace(2) externally_initialized global %struct.T zeroinitializer, __device__ T d_t_i = {2}; // CHECK: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, __constant__ T c_t_i = {2}; -// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, +// NVPTX: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, +// AMDGCN: @c_t_i = addrspace(2) externally_initialized global %struct.T { i32 2 }, // empty constructor __device__ EC d_ec; // CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, __shared__ EC s_ec; // CHECK: @s_ec = addrspace(3) global %struct.EC undef, __constant__ EC c_ec; -// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, +// NVPTX: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, +// AMDGCN: @c_ec = addrspace(2) externally_initialized global %struct.EC zeroinitializer, // empty destructor __device__ ED d_ed; // CHECK: @d_ed = addrspace(1) externally_initialized global %struct.ED zeroinitializer, __shared__ ED s_ed; // CHECK: @s_ed = addrspace(3) global %struct.ED undef, __constant__ ED c_ed; -// CHECK: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer, +// NVPTX: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer, +// AMDGCN: @c_ed = addrspace(2) externally_initialized global %struct.ED zeroinitializer, __device__ ECD d_ecd; // CHECK: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer, __shared__ ECD s_ecd; // CHECK: @s_ecd = addrspace(3) global %struct.ECD undef, __constant__ ECD c_ecd; -// CHECK: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer, +// NVPTX: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer, +// AMDGCN: @c_ecd = addrspace(2) externally_initialized global %struct.ECD zeroinitializer, // empty templated constructor -- allowed with no arguments __device__ ETC d_etc; // CHECK: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer, __shared__ ETC s_etc; // CHECK: @s_etc = addrspace(3) global %struct.ETC undef, __constant__ ETC c_etc; -// CHECK: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, +// NVPTX: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, +// AMDGCN: @c_etc = addrspace(2) externally_initialized global %struct.ETC zeroinitializer, __device__ NCFS d_ncfs; // CHECK: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 } __constant__ NCFS c_ncfs; -// CHECK: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 } +// NVPTX: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 } +// AMDGCN: @c_ncfs = addrspace(2) externally_initialized global %struct.NCFS { i32 3 } // Regular base class -- allowed __device__ T_B_T d_t_b_t; // CHECK: @d_t_b_t = addrspace(1) externally_initialized global %struct.T_B_T zeroinitializer, __shared__ T_B_T s_t_b_t; // CHECK: @s_t_b_t = addrspace(3) global %struct.T_B_T undef, __constant__ T_B_T c_t_b_t; -// CHECK: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, +// NVPTX: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, +// AMDGCN: @c_t_b_t = addrspace(2) externally_initialized global %struct.T_B_T zeroinitializer, // Incapsulated object of allowed class -- allowed __device__ T_F_T d_t_f_t; // CHECK: @d_t_f_t = addrspace(1) externally_initialized global %struct.T_F_T zeroinitializer, __shared__ T_F_T s_t_f_t; // CHECK: @s_t_f_t = addrspace(3) global %struct.T_F_T undef, __constant__ T_F_T c_t_f_t; -// CHECK: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, +// NVPTX: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, +// AMDGCN: @c_t_f_t = addrspace(2) externally_initialized global %struct.T_F_T zeroinitializer, // array of allowed objects -- allowed __device__ T_FA_T d_t_fa_t; // CHECK: @d_t_fa_t = addrspace(1) externally_initialized global %struct.T_FA_T zeroinitializer, __shared__ T_FA_T s_t_fa_t; // CHECK: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef, __constant__ T_FA_T c_t_fa_t; -// CHECK: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, +// NVPTX: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, +// AMDGCN: @c_t_fa_t = addrspace(2) externally_initialized global %struct.T_FA_T zeroinitializer, // Calling empty base class initializer is OK __device__ EC_I_EC d_ec_i_ec; // CHECK: @d_ec_i_ec = addrspace(1) externally_initialized global %struct.EC_I_EC zeroinitializer, __shared__ EC_I_EC s_ec_i_ec; // CHECK: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef, __constant__ EC_I_EC c_ec_i_ec; -// CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// NVPTX: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// AMDGCN: @c_ec_i_ec = addrspace(2) externally_initialized global %struct.EC_I_EC zeroinitializer, // We should not emit global initializers for device-side variables. // CHECK-NOT: @__cxx_global_var_init @@ -114,82 +129,111 @@ T t; // CHECK-NOT: call EC ec; - // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %ec) + // NVPTX: call void @_ZN2ECC1Ev(%struct.EC* %ec) + // AMDGCN: call void @_ZN2ECC1Ev(%struct.EC addrspace(4)* %ec) ED ed; // CHECK-NOT: call ECD ecd; - // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd) + // NVPTX: call void @_ZN3ECDC1Ev(%struct.ECD* %ecd) + // AMDGCN: call void @_ZN3ECDC1Ev(%struct.ECD addrspace(4)* %ecd) ETC etc; - // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc) + // NVPTX: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc) + // AMDGCN: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC addrspace(4)* %etc) UC uc; // undefined constructor -- not allowed - // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %uc) + // NVPTX: call void @_ZN2UCC1Ev(%struct.UC* %uc) + // AMDGCN: call void @_ZN2UCC1Ev(%struct.UC addrspace(4)* %uc) UD ud; // undefined destructor -- not allowed // CHECK-NOT: call ECI eci; // empty constructor w/ initializer list -- not allowed - // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %eci) + // NVPTX: call void @_ZN3ECIC1Ev(%struct.ECI* %eci) + // AMDGCN: call void @_ZN3ECIC1Ev(%struct.ECI addrspace(4)* %eci) NEC nec; // non-empty constructor -- not allowed - // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %nec) + // NVPTX: call void @_ZN3NECC1Ev(%struct.NEC* %nec) + // AMDGCN: call void @_ZN3NECC1Ev(%struct.NEC addrspace(4)* %nec) // non-empty destructor -- not allowed NED ned; // no-constructor, virtual method -- not allowed - // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv) + // NVPTX: call void @_ZN3NCVC1Ev(%struct.NCV* %ncv) + // AMDGCN: call void @_ZN3NCVC1Ev(%struct.NCV addrspace(4)* %ncv) NCV ncv; // CHECK-NOT: call VD vd; - // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %vd) + // NVPTX: call void @_ZN2VDC1Ev(%struct.VD* %vd) + // AMDGCN: call void @_ZN2VDC1Ev(%struct.VD addrspace(4)* %vd) NCF ncf; - // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf) + // NVPTX: call void @_ZN3NCFC1Ev(%struct.NCF* %ncf) + // AMDGCN: call void @_ZN3NCFC1Ev(%struct.NCF addrspace(4)* %ncf) NCFS ncfs; - // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs) + // NVPTX: call void @_ZN4NCFSC1Ev(%struct.NCFS* %ncfs) + // AMDGCN: call void @_ZN4NCFSC1Ev(%struct.NCFS addrspace(4)* %ncfs) UTC utc; - // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc) + // NVPTX: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %utc) + // AMDGCN: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC addrspace(4)* %utc) NETC netc; - // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) + // NVPTX: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) + // AMDGCN: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC addrspace(4)* %netc) T_B_T t_b_t; // CHECK-NOT: call T_F_T t_f_t; // CHECK-NOT: call T_FA_T t_fa_t; // CHECK-NOT: call EC_I_EC ec_i_ec; - // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) + // NVPTX: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) + // AMDGCN: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC addrspace(4)* %ec_i_ec) EC_I_EC1 ec_i_ec1; - // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) + // NVPTX: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) + // AMDGCN: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1 addrspace(4)* %ec_i_ec1) T_V_T t_v_t; - // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) + // NVPTX: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) + // AMDGCN: call void @_ZN5T_V_TC1Ev(%struct.T_V_T addrspace(4)* %t_v_t) T_B_NEC t_b_nec; - // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) + // NVPTX: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) + // AMDGCN: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC addrspace(4)* %t_b_nec) T_F_NEC t_f_nec; - // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) + // NVPTX: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) + // AMDGCN: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC addrspace(4)* %t_f_nec) T_FA_NEC t_fa_nec; - // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) + // NVPTX: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) + // AMDGCN: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC addrspace(4)* %t_fa_nec) T_B_NED t_b_ned; // CHECK-NOT: call T_F_NED t_f_ned; // CHECK-NOT: call T_FA_NED t_fa_ned; // CHECK-NOT: call static __shared__ EC s_ec; - // CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) + // NVPTX-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) + // AMDGCN-NOT: call void @_ZN2ECC1Ev(%struct.EC addrspace(4)* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC addrspace(4)*)) static __shared__ ETC s_etc; - // CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) + // NVPTX-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) + // AMDGCN-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC addrspace(4)* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC addrspace(4)*)) // anchor point separating constructors and destructors df(); // CHECK: call void @_Z2dfv() // Verify that we only call non-empty destructors - // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned) - // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned) - // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned) - // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd) - // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned) - // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud) - // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd) - // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed) + // NVPTX-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %t_fa_ned) + // NVPTX-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %t_f_ned) + // NVPTX-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %t_b_ned) + // NVPTX-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %vd) + // NVPTX-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %ned) + // NVPTX-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %ud) + // NVPTX-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %ecd) + // NVPTX-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %ed) + + // AMDGCN-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED addrspace(4)* %t_fa_ned) + // AMDGCN-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED addrspace(4)* %t_f_ned) + // AMDGCN-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED addrspace(4)* %t_b_ned) + // AMDGCN-NEXT: call void @_ZN2VDD1Ev(%struct.VD addrspace(4)* %vd) + // AMDGCN-NEXT: call void @_ZN3NEDD1Ev(%struct.NED addrspace(4)* %ned) + // AMDGCN-NEXT: call void @_ZN2UDD1Ev(%struct.UD addrspace(4)* %ud) + // AMDGCN-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD addrspace(4)* %ecd) + // AMDGCN-NEXT: call void @_ZN2EDD1Ev(%struct.ED addrspace(4)* %ed) // CHECK-NEXT: ret void } Index: test/CodeGenCUDA/convergent.cu =================================================================== --- test/CodeGenCUDA/convergent.cu +++ test/CodeGenCUDA/convergent.cu @@ -2,6 +2,9 @@ // REQUIRES: nvptx-registered-target // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s -DNVPTX | FileCheck -check-prefixes=DEVICE,NVPTX %s + +// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn -emit-llvm \ // RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ @@ -25,9 +28,11 @@ __host__ __device__ void bar() { // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] baz(); - // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] + #ifdef NVPTX + // NVPTX: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]] int x; asm ("trap;" : "=l"(x)); + #endif // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]] asm volatile ("trap;"); } Index: test/CodeGenCUDA/address-spaces.cu =================================================================== --- test/CodeGenCUDA/address-spaces.cu +++ test/CodeGenCUDA/address-spaces.cu @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple nvptx-unknown-unknown | FileCheck --check-prefixes=NVPTX,CHECK %s +// RUN: %clang_cc1 -emit-llvm %s -o - -fcuda-is-device -triple amdgcn | FileCheck --check-prefixes=AMDGCN,CHECK %s // Verifies Clang emits correct address spaces and addrspacecast instructions // for CUDA code. @@ -8,7 +9,8 @@ // CHECK: @i = addrspace(1) externally_initialized global __device__ int i; -// CHECK: @j = addrspace(4) externally_initialized global +// AMDGCN: @j = addrspace(2) externally_initialized global +// NVPTX: @j = addrspace(4) externally_initialized global __constant__ int j; // CHECK: @k = addrspace(3) global @@ -27,17 +29,21 @@ // CHECK: @b = addrspace(3) global float undef __device__ void foo() { - // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @i to i32 addrspace(4)*) i++; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(4)* @j to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(2)* @j to i32 addrspace(4)*) j++; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(3)* @k to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @k to i32 addrspace(4)*) k++; __shared__ int lk; - // CHECK: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) + // NVPTX: load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32*) + // AMDGCN: load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @_ZZ3foovE2lk to i32 addrspace(4)*) lk++; } @@ -47,8 +53,9 @@ ap->data1 = 1; ap->data2 = 2; } -// CHECK: define void @_Z5func0v() -// CHECK: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap +// CHECK-LABEL: define void @_Z5func0v() +// NVPTX: store %struct.MyStruct* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct*), %struct.MyStruct** %ap +// AMDGCN: store %struct.MyStruct addrspace(4)* addrspacecast (%struct.MyStruct addrspace(3)* @_ZZ5func0vE1a to %struct.MyStruct addrspace(4)*), %struct.MyStruct addrspace(4)* addrspace(4)* %ap __device__ void callee(float *ap) { *ap = 1.0f; @@ -58,37 +65,42 @@ __shared__ float a; callee(&a); // implicit cast from parameters } -// CHECK: define void @_Z5func1v() -// CHECK: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*)) +// CHECK-LABEL: define void @_Z5func1v() +// NVPTX: call void @_Z6calleePf(float* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float*)) +// AMDGCN: call void @_Z6calleePf(float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func1vE1a to float addrspace(4)*)) __device__ void func2() { __shared__ float a[256]; float *ap = &a[128]; // implicit cast from a decayed array *ap = 1.0f; } -// CHECK: define void @_Z5func2v() -// CHECK: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap - +// CHECK-LABEL: define void @_Z5func2v() +// NVPTX: store float* getelementptr inbounds ([256 x float], [256 x float]* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float]*), i32 0, i32 128), float** %ap +// AMDGCN: store float addrspace(4)* getelementptr inbounds ([256 x float], [256 x float] addrspace(4)* addrspacecast ([256 x float] addrspace(3)* @_ZZ5func2vE1a to [256 x float] addrspace(4)*), i64 0, i64 128), float addrspace(4)* addrspace(4)* %ap __device__ void func3() { __shared__ float a; float *ap = reinterpret_cast<float *>(&a); // explicit cast *ap = 1.0f; } -// CHECK: define void @_Z5func3v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap +// CHECK-LABEL: define void @_Z5func3v() +// NVPTX: store float* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float*), float** %ap +// AMDGCN: store float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func3vE1a to float addrspace(4)*), float addrspace(4)* addrspace(4)* %ap __device__ void func4() { __shared__ float a; float *ap = (float *)&a; // explicit c-style cast *ap = 1.0f; } -// CHECK: define void @_Z5func4v() -// CHECK: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap +// CHECK-LABEL: define void @_Z5func4v() +// NVPTX: store float* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float*), float** %ap +// AMDGCN: store float addrspace(4)* addrspacecast (float addrspace(3)* @_ZZ5func4vE1a to float addrspace(4)*), float addrspace(4)* addrspace(4)* %ap __shared__ float b; __device__ float *func5() { return &b; // implicit cast from a return value } -// CHECK: define float* @_Z5func5v() -// CHECK: ret float* addrspacecast (float addrspace(3)* @b to float*) +// NVPTX-LABEL: define float* @_Z5func5v() +// AMDGCN-LABEL: define float addrspace(4)* @_Z5func5v() +// NVPTX: ret float* addrspacecast (float addrspace(3)* @b to float*) +// AMDGCN: ret float addrspace(4)* addrspacecast (float addrspace(3)* @b to float addrspace(4)*) Index: lib/CodeGen/ItaniumCXXABI.cpp =================================================================== --- lib/CodeGen/ItaniumCXXABI.cpp +++ lib/CodeGen/ItaniumCXXABI.cpp @@ -1108,7 +1108,7 @@ if (!Record->hasTrivialDestructor()) { CXXDestructorDecl *DtorD = Record->getDestructor(); Dtor = CGM.getAddrOfCXXStructor(DtorD, StructorType::Complete); - Dtor = llvm::ConstantExpr::getBitCast(Dtor, CGM.Int8PtrTy); + Dtor = llvm::ConstantExpr::getPointerCast(Dtor, CGM.Int8PtrTy); } } if (!Dtor) Dtor = llvm::Constant::getNullValue(CGM.Int8PtrTy); @@ -1223,7 +1223,8 @@ auto *ClassDecl = cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl()); llvm::Value *Value = - CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl); + CGF.GetVTablePtr(ThisPtr, CGF.getTypes().getDefaultPointerTo( + StdTypeInfoPtrTy), ClassDecl); // Load the type info. Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL); @@ -1992,7 +1993,8 @@ CGM.getDataLayout().getABITypeAlignment(guardTy)); } } - llvm::PointerType *guardPtrTy = guardTy->getPointerTo(); + llvm::PointerType *guardPtrTy = guardTy->getPointerTo( + getContext().getTargetDefaultAddressSpace()); // Create the guard variable if we don't already have it (as we // might if we're double-emitting this function body). @@ -2010,7 +2012,10 @@ guard = new llvm::GlobalVariable(CGM.getModule(), guardTy, false, var->getLinkage(), llvm::ConstantInt::get(guardTy, 0), - guardName.str()); + guardName.str(), + /* InsertBefore */ nullptr, + llvm::GlobalValue::NotThreadLocal, + getContext().getTargetGlobalAddressSpace()); guard->setVisibility(var->getVisibility()); // If the variable is thread-local, so is its guard variable. guard->setThreadLocalMode(var->getThreadLocalMode()); @@ -2171,8 +2176,8 @@ llvm::Value *args[] = { llvm::ConstantExpr::getBitCast(dtor, dtorTy), - llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), - handle + llvm::ConstantExpr::getPointerCast(addr, CGF.Int8PtrTy), + llvm::ConstantExpr::getPointerCast(handle, CGF.Int8PtrTy) }; CGF.EmitNounwindRuntimeCall(atexit, args); } @@ -2584,7 +2589,7 @@ } } - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy); } /// TypeInfoIsInStandardLibrary - Given a builtin type, returns whether the type @@ -2913,7 +2918,7 @@ llvm::Constant *Two = llvm::ConstantInt::get(PtrDiffTy, 2); VTable = llvm::ConstantExpr::getInBoundsGetElementPtr(CGM.Int8PtrTy, VTable, Two); - VTable = llvm::ConstantExpr::getBitCast(VTable, CGM.Int8PtrTy); + VTable = llvm::ConstantExpr::getPointerCast(VTable, CGM.Int8PtrTy); Fields.push_back(VTable); } @@ -2986,7 +2991,7 @@ assert(!OldGV->hasAvailableExternallyLinkage() && "available_externally typeinfos not yet implemented"); - return llvm::ConstantExpr::getBitCast(OldGV, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(OldGV, CGM.Int8PtrTy); } // Check if there is already an external RTTI descriptor for this type. @@ -3022,7 +3027,7 @@ TypeNameField = llvm::ConstantExpr::getIntToPtr(TypeNameField, CGM.Int8PtrTy); } else { - TypeNameField = llvm::ConstantExpr::getBitCast(TypeName, CGM.Int8PtrTy); + TypeNameField = llvm::ConstantExpr::getPointerCast(TypeName, CGM.Int8PtrTy); } Fields.push_back(TypeNameField); @@ -3177,7 +3182,7 @@ } } - return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy); } /// BuildObjCObjectTypeInfo - Build the appropriate kind of type_info Index: lib/CodeGen/CodeGenTypes.h =================================================================== --- lib/CodeGen/CodeGenTypes.h +++ lib/CodeGen/CodeGenTypes.h @@ -182,6 +182,13 @@ /// ConvertType - Convert type T into a llvm::Type. llvm::Type *ConvertType(QualType T); + /// Get a pointer type pointing to the given QualType \p T. + llvm::PointerType *getPointerTypeTo(QualType T = QualType()); + + /// Get a pointer type pointing to the given llvm::Type \p T in the default + /// target address space. + llvm::PointerType *getDefaultPointerTo(llvm::Type *T); + /// \brief Converts the GlobalDecl into an llvm::Type. This should be used /// when we know the target of the function we want to convert. This is /// because some functions (explicitly, those with pass_object_size Index: lib/CodeGen/CodeGenTypes.cpp =================================================================== --- lib/CodeGen/CodeGenTypes.cpp +++ lib/CodeGen/CodeGenTypes.cpp @@ -375,6 +375,14 @@ return ResultType; } +llvm::PointerType *CodeGenTypes::getPointerTypeTo(QualType T) { + return ConvertType(T)->getPointerTo(Context.getTargetAddressSpace(T)); +} + +llvm::PointerType *CodeGenTypes::getDefaultPointerTo(llvm::Type *T) { + return T->getPointerTo(Context.getTargetDefaultAddressSpace()); +} + /// ConvertType - Convert the specified type to its LLVM form. llvm::Type *CodeGenTypes::ConvertType(QualType T) { T = Context.getCanonicalType(T); Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -47,6 +47,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -90,6 +91,7 @@ VMContext(M.getContext()), Types(*this), VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { + unsigned DefaultTargetAddressSpace = Target.getDefaultTargetAddressSpace(LangOpts); // Initialize the type cache. llvm::LLVMContext &LLVMContext = M.getContext(); VoidTy = llvm::Type::getVoidTy(LLVMContext); @@ -99,18 +101,18 @@ Int64Ty = llvm::Type::getInt64Ty(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); - PointerWidthInBits = C.getTargetInfo().getPointerWidth(0); + PointerWidthInBits = C.getTargetInfo().getPointerWidth(DefaultTargetAddressSpace); PointerAlignInBytes = - C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity(); + C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(DefaultTargetAddressSpace)).getQuantity(); SizeSizeInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); - Int8PtrTy = Int8Ty->getPointerTo(0); - Int8PtrPtrTy = Int8PtrTy->getPointerTo(0); + Int8PtrTy = Int8Ty->getPointerTo(DefaultTargetAddressSpace); + Int8PtrPtrTy = Int8PtrTy->getPointerTo(DefaultTargetAddressSpace); RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); BuiltinCC = getTargetCodeGenInfo().getABIInfo().getBuiltinCC(); @@ -750,7 +752,7 @@ ctor.addInt(Int32Ty, I.Priority); ctor.add(llvm::ConstantExpr::getBitCast(I.Initializer, CtorPFTy)); if (I.AssociatedData) - ctor.add(llvm::ConstantExpr::getBitCast(I.AssociatedData, VoidPtrTy)); + ctor.add(llvm::ConstantExpr::getPointerCast(I.AssociatedData, VoidPtrTy)); else ctor.addNullPointer(VoidPtrTy); ctor.finishAndAddTo(ctors); @@ -1418,10 +1420,13 @@ *LineNoCst = EmitAnnotationLineNo(L); // Create the ConstantStruct for the global annotation. + unsigned AS = GV->getType()->getAddressSpace(); + llvm::PointerType *I8PTy = (AS == Int8PtrTy->getAddressSpace()) ? + Int8PtrTy : Int8Ty->getPointerTo(AS); llvm::Constant *Fields[4] = { - llvm::ConstantExpr::getBitCast(GV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(AnnoGV, Int8PtrTy), - llvm::ConstantExpr::getBitCast(UnitGV, Int8PtrTy), + llvm::ConstantExpr::getPointerCast(GV, I8PTy), + llvm::ConstantExpr::getPointerCast(AnnoGV, I8PTy), + llvm::ConstantExpr::getPointerCast(UnitGV, I8PTy), LineNoCst }; return llvm::ConstantStruct::getAnon(Fields); @@ -1548,7 +1553,7 @@ llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) { unsigned AS = getContext().getTargetAddressSpace(VD->getType()); - auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS)); + auto Ptr = llvm::ConstantExpr::getPointerCast(Entry, DeclTy->getPointerTo(AS)); return ConstantAddress(Ptr, Alignment); } @@ -1900,7 +1905,7 @@ /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially -/// bitcasted to the right type. +/// casted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. @@ -1952,7 +1957,7 @@ // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo()); + return llvm::ConstantExpr::getPointerCast(Entry, Ty->getPointerTo()); } // This function doesn't have a complete type (for example, the return @@ -2060,7 +2065,7 @@ } llvm::Type *PTy = llvm::PointerType::getUnqual(Ty); - return llvm::ConstantExpr::getBitCast(F, PTy); + return llvm::ConstantExpr::getPointerCast(F, PTy); } /// GetAddrOfFunction - Return the address of the given function. If Ty is @@ -2189,7 +2194,7 @@ /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module, /// create and return an llvm GlobalVariable with the specified type. If there /// is something in the module with the specified name, return it potentially -/// bitcasted to the right type. +/// casted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. @@ -2237,14 +2242,10 @@ } } - // Make sure the result is of the correct type. - if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace()) - return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty); - // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) - return llvm::ConstantExpr::getBitCast(Entry, Ty); + return llvm::ConstantExpr::getPointerCast(Entry, Ty); } unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace()); @@ -2260,7 +2261,7 @@ if (!Entry->use_empty()) { llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, Entry->getType()); + llvm::ConstantExpr::getPointerCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); } @@ -2372,7 +2373,7 @@ if (!OldGV->use_empty()) { llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, OldGV->getType()); + llvm::ConstantExpr::getPointerCast(GV, OldGV->getType()); OldGV->replaceAllUsesWith(NewPtrForOldDecl); } @@ -2452,6 +2453,12 @@ AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared); else AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device); + } else if (getTriple().getArch() == llvm::Triple::amdgcn && + (LangOpts.CPlusPlus || LangOpts.OpenMP)) { + if (D && D->getType().isConstant(getContext())) + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant); + else + AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_global); } return AddrSpace; @@ -2621,7 +2628,7 @@ // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = - llvm::ConstantExpr::getBitCast(GV, Entry->getType()); + llvm::ConstantExpr::getPointerCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); // Erase the old global, since it is no longer used. @@ -3116,7 +3123,7 @@ // Remove it and replace uses of it with the alias. GA->takeName(Entry); - Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GA, + Entry->replaceAllUsesWith(llvm::ConstantExpr::getPointerCast(GA, Entry->getType())); Entry->eraseFromParent(); } else { @@ -3334,7 +3341,7 @@ if (isUTF16) // Cast the UTF16 string to the correct type. - Str = llvm::ConstantExpr::getBitCast(Str, Int8PtrTy); + Str = llvm::ConstantExpr::getPointerCast(Str, Int8PtrTy); Fields.add(Str); // String length. @@ -3442,7 +3449,7 @@ CodeGenModule &CGM, StringRef GlobalName, CharUnits Alignment) { // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. - unsigned AddrSpace = 0; + unsigned AddrSpace = CGM.getContext().getTargetConstantAddressSpace(); if (CGM.getLangOpts().OpenCL) AddrSpace = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); @@ -3760,6 +3767,9 @@ /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { + if (getenv("DBG_CG_DECL")) { + llvm::errs() << "decl: "; D->dump(); + } // Ignore dependent declarations. if (D->getDeclContext() && D->getDeclContext()->isDependentContext()) return; Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -377,7 +377,7 @@ }; /// i32s containing the indexes of the cleanup destinations. - llvm::AllocaInst *NormalCleanupDest; + llvm::Instruction *NormalCleanupDest; unsigned NextCleanupDestIndex; @@ -392,8 +392,8 @@ llvm::Value *ExceptionSlot; /// The selector slot. Under the MandatoryCleanup model, all landing pads - /// write the current selector value into this alloca. - llvm::AllocaInst *EHSelectorSlot; + /// write the current selector value into this instruction. + llvm::Instruction *EHSelectorSlot; /// A stack of exception code slots. Entering an __except block pushes a slot /// on the stack and leaving pops one. The __exception_code() intrinsic loads @@ -428,11 +428,11 @@ /// An i1 variable indicating whether or not the @finally is /// running for an exception. - llvm::AllocaInst *ForEHVar; + llvm::Instruction *ForEHVar; /// An i8* variable into which the exception pointer to rethrow /// has been saved. - llvm::AllocaInst *SavedExnVar; + llvm::Instruction *SavedExnVar; public: void enter(CodeGenFunction &CGF, const Stmt *Finally, @@ -1858,14 +1858,23 @@ AlignmentSource *Source = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); + /// Create an alloca instruction. If the default address space is not 0, + /// insert addrspacecast instruction which casts the alloca instruction + /// to the default address space. + llvm::Instruction *CreateAlloca(llvm::Type *Ty, const Twine &Name = "tmp", + llvm::Instruction *InsertPos = nullptr); /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. The caller is responsible for setting an appropriate alignment on - /// the alloca. - llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty, - const Twine &Name = "tmp"); + /// the alloca. If the default address space is not 0, insert addrspacecast. + llvm::Instruction *CreateTempAlloca(llvm::Type *Ty, + const Twine &Name = "tmp"); Address CreateTempAlloca(llvm::Type *Ty, CharUnits align, const Twine &Name = "tmp"); + /// Get alloca instruction operand of an addrspacecast instruction. + /// If \p Inst is alloca instruction, returns \p Inst; + llvm::AllocaInst *getAddrSpaceCastedAlloca(llvm::Instruction *Inst) const; + /// CreateDefaultAlignedTempAlloca - This creates an alloca with the /// default ABI alignment of the given LLVM type. /// Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -442,7 +442,7 @@ "callsite"); llvm::Value *args[] = { - llvm::ConstantExpr::getBitCast(CurFn, PointerTy), + llvm::ConstantExpr::getPointerCast(CurFn, PointerTy), CallSite }; Index: lib/CodeGen/CGVTables.cpp =================================================================== --- lib/CodeGen/CGVTables.cpp +++ lib/CodeGen/CGVTables.cpp @@ -550,7 +550,7 @@ return addOffsetConstant(component.getOffsetToTop()); case VTableComponent::CK_RTTI: - return builder.add(llvm::ConstantExpr::getBitCast(rtti, CGM.Int8PtrTy)); + return builder.add(llvm::ConstantExpr::getPointerCast(rtti, CGM.Int8PtrTy)); case VTableComponent::CK_FunctionPointer: case VTableComponent::CK_CompleteDtorPointer: @@ -594,7 +594,7 @@ llvm::Constant *fn = CGM.CreateRuntimeFunction(fnTy, name); if (auto f = dyn_cast<llvm::Function>(fn)) f->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - return llvm::ConstantExpr::getBitCast(fn, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(fn, CGM.Int8PtrTy); }; llvm::Constant *fnPtr; @@ -628,7 +628,7 @@ fnPtr = CGM.GetAddrOfFunction(GD, fnTy, /*ForVTable=*/true); } - fnPtr = llvm::ConstantExpr::getBitCast(fnPtr, CGM.Int8PtrTy); + fnPtr = llvm::ConstantExpr::getPointerCast(fnPtr, CGM.Int8PtrTy); builder.add(fnPtr); return; } Index: lib/CodeGen/CGVTT.cpp =================================================================== --- lib/CodeGen/CGVTT.cpp +++ lib/CodeGen/CGVTT.cpp @@ -84,7 +84,7 @@ VTable->getValueType(), VTable, Idxs, /*InBounds=*/true, /*InRangeIndex=*/1); - Init = llvm::ConstantExpr::getBitCast(Init, Int8PtrTy); + Init = llvm::ConstantExpr::getPointerCast(Init, Int8PtrTy); VTTComponents.push_back(Init); } Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -924,7 +924,7 @@ DefaultOpenMPPSource = CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); DefaultOpenMPPSource = - llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); + llvm::ConstantExpr::getPointerCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } ConstantInitBuilder builder(CGM); @@ -2918,7 +2918,7 @@ llvm::Module &M = CGM.getModule(); // Make sure the address has the right type. - llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy); + llvm::Constant *AddrPtr = llvm::ConstantExpr::getPointerCast(ID, CGM.VoidPtrTy); // Create constant string with the name. llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); @@ -2928,7 +2928,7 @@ llvm::GlobalValue::InternalLinkage, StrPtrInit, ".omp_offloading.entry_name"); Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); + llvm::Constant *StrPtr = llvm::ConstantExpr::getPointerCast(Str, CGM.Int8PtrTy); // We can't have any padding between symbols, so we need to have 1-byte // alignment. @@ -4871,7 +4871,7 @@ // the device, because these functions will be entry points to the device. if (CGM.getLangOpts().OpenMPIsDevice) { - OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); + OutlinedFnID = llvm::ConstantExpr::getPointerCast(OutlinedFn, CGM.Int8PtrTy); OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); } else OutlinedFnID = new llvm::GlobalVariable( Index: lib/CodeGen/CGGPUBuiltin.cpp =================================================================== --- lib/CodeGen/CGGPUBuiltin.cpp +++ lib/CodeGen/CGGPUBuiltin.cpp @@ -21,9 +21,9 @@ using namespace clang; using namespace CodeGen; -static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { - llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()), - llvm::Type::getInt8PtrTy(M.getContext())}; +static llvm::Function *GetVprintfDeclaration(CodeGenModule &CGM) { + auto &M = CGM.getModule(); + llvm::Type *ArgTypes[] = {CGM.Int8PtrTy, CGM.Int8PtrTy}; llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get( llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false); @@ -69,12 +69,13 @@ RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { - assert(getTarget().getTriple().isNVPTX()); + assert(getTarget().getTriple().isNVPTX() || + (getTarget().getTriple().getArch() == llvm::Triple::amdgcn && + getLangOpts().CUDA)); assert(E->getBuiltinCallee() == Builtin::BIprintf); assert(E->getNumArgs() >= 1); // printf always has at least one arg. const llvm::DataLayout &DL = CGM.getDataLayout(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); CallArgList Args; EmitCallArgs(Args, @@ -93,7 +94,7 @@ llvm::Value *BufferPtr; if (Args.size() <= 1) { // If there are no args, pass a null pointer to vprintf. - BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); + BufferPtr = llvm::ConstantPointerNull::get(CGM.Int8PtrTy); } else { llvm::SmallVector<llvm::Type *, 8> ArgTypes; for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) @@ -112,11 +113,11 @@ llvm::Value *Arg = Args[I].RV.getScalarVal(); Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType())); } - BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + BufferPtr = Builder.CreatePointerCast(Alloca, CGM.Int8PtrTy); } // Invoke vprintf and return. - llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); + llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM); return RValue::get( Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr})); } Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -256,8 +256,15 @@ //===--------------------------------------------------------------------===// Value *Visit(Expr *E) { + if (getenv("DBG_CG_SCALAR_EXPR")) { + llvm::errs() << "Expr: "; E->dump(); + } ApplyDebugLocation DL(CGF, E); - return StmtVisitor<ScalarExprEmitter, Value*>::Visit(E); + auto Res = StmtVisitor<ScalarExprEmitter, Value*>::Visit(E); + if (getenv("DBG_CG_SCALAR_EXPR")) { + llvm::errs() << " => " << *Res << '\n'; + } + return Res; } Value *VisitStmt(Stmt *S) { Index: lib/CodeGen/CGExprConstant.cpp =================================================================== --- lib/CodeGen/CGExprConstant.cpp +++ lib/CodeGen/CGExprConstant.cpp @@ -1316,7 +1316,7 @@ if (!Offset->isNullValue()) { unsigned AS = C->getType()->getPointerAddressSpace(); llvm::Type *CharPtrTy = Int8Ty->getPointerTo(AS); - llvm::Constant *Casted = llvm::ConstantExpr::getBitCast(C, CharPtrTy); + llvm::Constant *Casted = llvm::ConstantExpr::getPointerCast(C, CharPtrTy); Casted = llvm::ConstantExpr::getGetElementPtr(Int8Ty, Casted, Offset); C = llvm::ConstantExpr::getPointerCast(Casted, C->getType()); } Index: lib/CodeGen/CGExprCXX.cpp =================================================================== --- lib/CodeGen/CGExprCXX.cpp +++ lib/CodeGen/CGExprCXX.cpp @@ -2024,8 +2024,7 @@ } llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) { - llvm::Type *StdTypeInfoPtrTy = - ConvertType(E->getType())->getPointerTo(); + llvm::Type *StdTypeInfoPtrTy = getTypes().getPointerTypeTo(E->getType()); if (E->isTypeOperand()) { llvm::Constant *TypeInfo = Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -62,16 +62,37 @@ /// block. Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, const Twine &Name) { - auto Alloca = CreateTempAlloca(Ty, Name); + auto CastedAlloca = CreateTempAlloca(Ty, Name); + auto *Alloca = getAddrSpaceCastedAlloca(CastedAlloca); Alloca->setAlignment(Align.getQuantity()); - return Address(Alloca, Align); + return Address(CastedAlloca, Align); } /// CreateTempAlloca - This creates a alloca and inserts it into the entry /// block. -llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, - const Twine &Name) { - return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt); +llvm::Instruction *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, + const Twine &Name) { + return CreateAlloca(Ty, Name, AllocaInsertPt); +} + +llvm::Instruction *CodeGenFunction::CreateAlloca(llvm::Type *Ty, + const Twine &Name, + llvm::Instruction *InsertPos) { + llvm::Instruction *V = new llvm::AllocaInst(Ty, nullptr, Name, InsertPos); + auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts()); + if (DefaultAddr != 0) { + auto *DestTy = llvm::PointerType::get(V->getType()->getPointerElementType(), + DefaultAddr); + V = new llvm::AddrSpaceCastInst(V, DestTy, "", InsertPos); + } + return V; +} + +llvm::AllocaInst * +CodeGenFunction::getAddrSpaceCastedAlloca(llvm::Instruction *V) const { + if (auto *Cast = dyn_cast<llvm::AddrSpaceCastInst>(V)) + return cast<llvm::AllocaInst>(Cast->getOperand(0)); + return cast<llvm::AllocaInst>(V); } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -416,8 +437,8 @@ // Create and initialize the reference temporary. Address Object = createReferenceTemporary(*this, M, E); if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) { - Object = Address(llvm::ConstantExpr::getBitCast( - Var, ConvertTypeForMem(E->getType())->getPointerTo()), + Object = Address(llvm::ConstantExpr::getPointerCast( + Var, getTypes().getPointerTypeTo(E->getType())), Object.getAlignment()); // If the temporary is a global and has a constant initializer or is a // constant temporary that we promoted to a global, we may have already @@ -2887,7 +2908,9 @@ } QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType(); - return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType)); + return Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, + ConvertTypeForMem(EltType)->getPointerTo(getContext(). + getTargetAddressSpace(E->getType()))); } /// isSimpleArrayDecayOperand - If the specified expr is a simple decay from an Index: lib/CodeGen/CGException.cpp =================================================================== --- lib/CodeGen/CGException.cpp +++ lib/CodeGen/CGException.cpp @@ -237,7 +237,7 @@ static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM, const EHPersonality &Personality) { llvm::Constant *Fn = getPersonalityFn(CGM, Personality); - return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); + return llvm::ConstantExpr::getPointerCast(Fn, CGM.Int8PtrTy); } /// Check whether a landingpad instruction only uses C++ features. @@ -1520,7 +1520,7 @@ llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::localrecover); llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); + llvm::ConstantExpr::getPointerCast(ParentCGF.CurFn, Int8PtrTy); RecoverCall = Builder.CreateCall( FrameRecoverFn, {ParentI8Fn, ParentFP, llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)}); @@ -1585,7 +1585,7 @@ llvm::Function *RecoverFPIntrin = CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp); llvm::Constant *ParentI8Fn = - llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy); + llvm::ConstantExpr::getPointerCast(ParentCGF.CurFn, Int8PtrTy); ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP}); } @@ -1812,7 +1812,7 @@ llvm::Function *FilterFunc = HelperCGF.GenerateSEHFilterFunction(*this, *Except); llvm::Constant *OpaqueFunc = - llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy); + llvm::ConstantExpr::getPointerCast(FilterFunc, Int8PtrTy); CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except.ret")); } Index: lib/CodeGen/CGDeclCXX.cpp =================================================================== --- lib/CodeGen/CGDeclCXX.cpp +++ lib/CodeGen/CGDeclCXX.cpp @@ -103,8 +103,8 @@ CXXDestructorDecl *dtor = Record->getDestructor(); function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete); - argument = llvm::ConstantExpr::getBitCast( - addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo()); + argument = llvm::ConstantExpr::getPointerCast( + addr.getPointer(), CGF.getTypes().getPointerTypeTo(type)); // Otherwise, the standard logic requires a helper function. } else { @@ -135,7 +135,7 @@ CharUnits WidthChars = CGF.getContext().getTypeSizeInChars(D.getType()); uint64_t Width = WidthChars.getQuantity(); llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(CGF.Int64Ty, Width), - llvm::ConstantExpr::getBitCast(Addr, CGF.Int8PtrTy)}; + llvm::ConstantExpr::getPointerCast(Addr, CGF.Int8PtrTy)}; CGF.Builder.CreateCall(InvariantStart, Args); } Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -1075,7 +1075,15 @@ llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla"); vla->setAlignment(alignment.getQuantity()); - address = Address(vla, alignment); + llvm::Value *V = vla; + auto DefaultAddr = getTarget().getDefaultTargetAddressSpace(getLangOpts()); + if (DefaultAddr != 0) { + auto *DestTy = + llvm::PointerType::get(vla->getType()->getElementType(), DefaultAddr); + V = Builder.CreateAddrSpaceCast(vla, DestTy); + } + + address = Address(V, alignment); } setAddrOfLocalVar(&D, address); @@ -1244,7 +1252,7 @@ // Otherwise, create a temporary global with the initializer then // memcpy from the global to the alloca. std::string Name = getStaticDeclName(CGM, D); - unsigned AS = 0; + unsigned AS = CGM.getContext().getTargetConstantAddressSpace(); if (getLangOpts().OpenCL) { AS = CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); BP = llvm::PointerType::getInt8PtrTy(getLLVMContext(), AS); Index: lib/CodeGen/CGClass.cpp =================================================================== --- lib/CodeGen/CGClass.cpp +++ lib/CodeGen/CGClass.cpp @@ -2372,12 +2372,16 @@ // Finally, store the address point. Use the same LLVM types as the field to // support optimization. + auto DefAddr = CGM.getTarget().getDefaultTargetAddressSpace( + CGM.getLangOpts()); llvm::Type *VTablePtrTy = llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true) - ->getPointerTo() - ->getPointerTo(); - VTableField = Builder.CreateBitCast(VTableField, VTablePtrTy->getPointerTo()); - VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy); + ->getPointerTo(DefAddr) + ->getPointerTo(DefAddr); + VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast(VTableField, + VTablePtrTy->getPointerTo(DefAddr)); + VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast( + VTableAddressPoint, VTablePtrTy); llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField); CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAInfoForVTablePtr()); Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -3643,18 +3643,19 @@ if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) { ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct); llvm::Instruction *IP = CallArgs.getStackBase(); - llvm::AllocaInst *AI; + llvm::Instruction *CastedAI; if (IP) { IP = IP->getNextNode(); - AI = new llvm::AllocaInst(ArgStruct, "argmem", IP); + CastedAI = CreateAlloca(ArgStruct, "argmem", IP); } else { - AI = CreateTempAlloca(ArgStruct, "argmem"); + CastedAI = CreateTempAlloca(ArgStruct, "argmem"); } auto Align = CallInfo.getArgStructAlignment(); + auto *AI = getAddrSpaceCastedAlloca(CastedAI); AI->setAlignment(Align.getQuantity()); AI->setUsedWithInAlloca(true); assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca()); - ArgMemory = Address(AI, Align); + ArgMemory = Address(CastedAI, Align); } // Helper function to drill into the inalloca allocation. Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -2312,7 +2312,7 @@ case Builtin::BI__GetExceptionInfo: { if (llvm::GlobalVariable *GV = CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) - return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); + return RValue::get(llvm::ConstantExpr::getPointerCast(GV, CGM.Int8PtrTy)); break; } @@ -2674,7 +2674,9 @@ Arg)); } case Builtin::BIprintf: - if (getTarget().getTriple().isNVPTX()) + if (getTarget().getTriple().isNVPTX() || + (getTarget().getTriple().getArch() == Triple::amdgcn && + getLangOpts().CUDA)) return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue); break; case Builtin::BI__builtin_canonicalize: Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -1997,16 +1997,6 @@ return llvm::makeArrayRef(GCCRegNames); } -static const unsigned AMDGPUAddrSpaceMap[] = { - 1, // opencl_global - 3, // opencl_local - 2, // opencl_constant - 4, // opencl_generic - 1, // cuda_device - 2, // cuda_constant - 3 // cuda_shared -}; - // If you edit the description strings, make sure you update // getPointerWidthV(). @@ -2020,9 +2010,18 @@ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; class AMDGPUTargetInfo final : public TargetInfo { + static const unsigned AddrSpaceMap_[7]; static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; + enum AddrSpaceKind { + AS_Private = 0, + AS_Global = 1, + AS_Constant = 2, + AS_Local = 3, + AS_Generic = 4 + }; + /// \brief The GPU profiles supported by the AMDGPU target. enum GPUKind { GK_NONE, @@ -2066,7 +2065,7 @@ resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? DataLayoutStringSI : DataLayoutStringR600); - AddrSpaceMap = &AMDGPUAddrSpaceMap; + AddrSpaceMap = &AddrSpaceMap_; UseAddrSpaceMapMangling = true; } @@ -2254,6 +2253,23 @@ } } + unsigned + getDefaultTargetAddressSpace(const LangOptions &Opts) const override { + // OpenCL sets address space explicitly in AST. The default case (type + // qualifier containing no address space) represents private address space. + if (Opts.OpenCL) + return AS_Private; + return AS_Generic; + } + + unsigned getConstantAddressSpace() const override { + return AS_Constant; + } + + unsigned getGlobalAddressSpace() const override { + return AS_Global; + } + LangAS::ID getOpenCLImageAddrSpace() const override { return LangAS::opencl_constant; } @@ -2268,14 +2284,23 @@ } } - // In amdgcn target the null pointer in global, constant, and generic - // address space has value 0 but in private and local address space has - // value ~0. + // In amdgcn target the null pointer in local and private address spaces has + // value ~0 and in other address spaces has value 0. uint64_t getNullPointerValue(unsigned AS) const override { - return AS != LangAS::opencl_local && AS != 0 ? 0 : ~0; + return AS != AS_Local && AS != 0 ? 0 : ~0; } }; +const unsigned AMDGPUTargetInfo::AddrSpaceMap_[] = { + AS_Global, // opencl_global + AS_Local, // opencl_local + AS_Constant, // opencl_constant + AS_Generic, // opencl_generic + AS_Global, // cuda_device + AS_Constant, // cuda_constant + AS_Local // cuda_shared +}; + const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = { #define BUILTIN(ID, TYPE, ATTRS) \ { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr }, Index: lib/Basic/TargetInfo.cpp =================================================================== --- lib/Basic/TargetInfo.cpp +++ lib/Basic/TargetInfo.cpp @@ -330,6 +330,13 @@ if (Opts.NewAlignOverride) NewAlign = Opts.NewAlignOverride * getCharWidth(); + + if (getTriple().getArch() == llvm::Triple::amdgcn) { + auto DefAddr = getDefaultTargetAddressSpace(Opts); + // AMDGPUTargetInfo only implements getPointerWidthV and assumes + // pointers are self-aligned. + PointerWidth = PointerAlign = getPointerWidthV(DefAddr); + } } bool TargetInfo::initFeatureMap( Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -9531,13 +9531,40 @@ uint64_t ASTContext::getTargetNullPointerValue(QualType QT) const { unsigned AS; if (QT->getUnqualifiedDesugaredType()->isNullPtrType()) - AS = 0; + AS = getTargetInfo().getDefaultTargetAddressSpace(LangOpts); else - AS = QT->getPointeeType().getAddressSpace(); + AS = getTargetAddressSpace(QT->getPointeeType()); return getTargetInfo().getNullPointerValue(AS); } +unsigned ASTContext::getTargetDefaultAddressSpace() const { + return getTargetInfo().getDefaultTargetAddressSpace(LangOpts); +} + +unsigned ASTContext::getTargetConstantAddressSpace() const { + return getTargetInfo().getConstantAddressSpace(); +} + +unsigned ASTContext::getTargetGlobalAddressSpace() const { + return getTargetInfo().getGlobalAddressSpace(); +} + +unsigned ASTContext::getTargetAddressSpace(QualType T) const { + if (T.isNull()) + return getTargetDefaultAddressSpace(); + if (T->isFunctionType() && + !T.getQualifiers().hasAddressSpace()) + return 0; + return getTargetAddressSpace(T.getQualifiers()); +} + +unsigned ASTContext::getTargetAddressSpace(Qualifiers Q) const { + return Q.hasAddressSpace() + ? getTargetAddressSpace(Q.getAddressSpace()) + : getTargetDefaultAddressSpace(); +} + // Explicitly instantiate this in case a Redeclarable<T> is used from a TU that // doesn't include ASTContext.h template Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -302,11 +302,23 @@ } /// \brief Get integer value for null pointer. - /// \param AddrSpace address space of pointee in source language. + /// \param AddrSpace target address space of pointee. virtual uint64_t getNullPointerValue(unsigned AddrSpace) const { return 0; } + /// The target address space corresponding to OpenCL constant address space + /// CUDA constant specifier. + virtual unsigned getConstantAddressSpace() const { + return 0; + } + + /// The target address space corresponding to OpenCL global address space + /// or CUDA device specifier. + virtual unsigned getGlobalAddressSpace() const { + return 0; + } + /// \brief Return the size of '_Bool' and C++ 'bool' for this target, in bits. unsigned getBoolWidth() const { return BoolWidth; } @@ -953,6 +965,10 @@ return *AddrSpaceMap; } + virtual unsigned getDefaultTargetAddressSpace(const LangOptions &Opt) const { + return 0; + } + /// \brief Retrieve the name of the platform as it is used in the /// availability attribute. StringRef getPlatformName() const { return PlatformName; } Index: include/clang/AST/ASTContext.h =================================================================== --- include/clang/AST/ASTContext.h +++ include/clang/AST/ASTContext.h @@ -2300,13 +2300,9 @@ QualType getFloatingTypeOfSizeWithinDomain(QualType typeSize, QualType typeDomain) const; - unsigned getTargetAddressSpace(QualType T) const { - return getTargetAddressSpace(T.getQualifiers()); - } + unsigned getTargetAddressSpace(QualType T) const; - unsigned getTargetAddressSpace(Qualifiers Q) const { - return getTargetAddressSpace(Q.getAddressSpace()); - } + unsigned getTargetAddressSpace(Qualifiers Q) const; unsigned getTargetAddressSpace(unsigned AS) const { if (AS < LangAS::Offset || AS >= LangAS::Offset + LangAS::Count) @@ -2319,6 +2315,16 @@ /// constant folding. uint64_t getTargetNullPointerValue(QualType QT) const; + unsigned getTargetDefaultAddressSpace() const; + + /// The target address space corresponding to OpenCL constant address space + /// CUDA constant specifier. + unsigned getTargetConstantAddressSpace() const; + + /// The target address space corresponding to OpenCL global address space + /// or CUDA device specifier. + unsigned getTargetGlobalAddressSpace() const; + bool addressSpaceMapManglingFor(unsigned AS) const { return AddrSpaceMapMangling || AS < LangAS::Offset ||
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits