ABataev created this revision. ABataev added a reviewer: jdoerfert. Herald added a subscriber: guansong. Herald added a project: clang.
Currently, we ignore all locality attributes/info when building for the device and thus all symblos are externally visible and can be preemted at the runtime. It may lead to incorrect results. We need to follow the same logic, compiler uses for static/pie builds. But in some cases changing of dso locality may lead to problems with codegen, so instead mark external symbols as hidden instead in the device code. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D70549 Files: clang/lib/AST/Decl.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/test/OpenMP/declare_target_codegen.cpp clang/test/OpenMP/nvptx_allocate_codegen.cpp clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp clang/test/OpenMP/nvptx_target_codegen.cpp clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
Index: clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp +++ clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp @@ -34,18 +34,18 @@ #pragma omp declare target T a = T(); T f = a; -// CHECK: define{{ dso_local | }}void @{{.+}}foo{{.+}}([[T]]* byval([[T]]) align {{.+}}) +// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}([[T]]* byval([[T]]) align {{.+}}) void foo(T a = T()) { return; } -// CHECK: define{{ dso_local | }}[6 x i64] @{{.+}}bar{{.+}}() +// CHECK: define{{ hidden | }}[6 x i64] @{{.+}}bar{{.+}}() T bar() { // CHECK: bitcast [[T]]* %{{.+}} to [6 x i64]* // CHECK-NEXT: load [6 x i64], [6 x i64]* %{{.+}}, // CHECK-NEXT: ret [6 x i64] return T(); } -// CHECK: define{{ dso_local | }}void @{{.+}}baz{{.+}}() +// CHECK: define{{ hidden | }}void @{{.+}}baz{{.+}}() void baz() { // CHECK: call [6 x i64] @{{.+}}bar{{.+}}() // CHECK-NEXT: bitcast [[T]]* %{{.+}} to [6 x i64]* @@ -54,17 +54,17 @@ } T1 a1 = T1(); T1 f1 = a1; -// CHECK: define{{ dso_local | }}void @{{.+}}foo1{{.+}}([[T1]]* byval([[T1]]) align {{.+}}) +// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}([[T1]]* byval([[T1]]) align {{.+}}) void foo1(T1 a = T1()) { return; } -// CHECK: define{{ dso_local | }}[[T1]] @{{.+}}bar1{{.+}}() +// CHECK: define{{ hidden | }}[[T1]] @{{.+}}bar1{{.+}}() T1 bar1() { // CHECK: load [[T1]], [[T1]]* // CHECK-NEXT: ret [[T1]] return T1(); } -// CHECK: define{{ dso_local | }}void @{{.+}}baz1{{.+}}() +// CHECK: define{{ hidden | }}void @{{.+}}baz1{{.+}}() void baz1() { // CHECK: call [[T1]] @{{.+}}bar1{{.+}}() T1 t = bar1(); Index: clang/test/OpenMP/nvptx_target_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_codegen.cpp +++ clang/test/OpenMP/nvptx_target_codegen.cpp @@ -573,7 +573,7 @@ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define{{ dso_local | }}i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) + // CHECK: define{{ hidden | }}i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) // CHECK: alloca i32, // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32, // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, Index: clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp +++ clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp @@ -16,9 +16,9 @@ // SIMD-ONLY-NOT: {{__kmpc|__tgt}} // DEVICE-DAG: [[C_ADDR:.+]] = internal global i32 0, -// DEVICE-DAG: [[CD_ADDR:@.+]] ={{ dso_local | }}global %struct.S zeroinitializer, +// DEVICE-DAG: [[CD_ADDR:@.+]] ={{ hidden | }}global %struct.S zeroinitializer, // HOST-DAG: @[[C_ADDR:.+]] = internal global i32 0, -// HOST-DAG: @[[CD_ADDR:.+]] ={{ dso_local | }}global %struct.S zeroinitializer, +// HOST-DAG: @[[CD_ADDR:.+]] ={{ hidden | }}global %struct.S zeroinitializer, #pragma omp declare target int foo() { return 0; } @@ -34,12 +34,12 @@ #pragma omp declare target (bar) int caz() { return 0; } -// DEVICE-DAG: define{{ dso_local | }}i32 [[FOO:@.*foo.*]]() -// DEVICE-DAG: define{{ dso_local | }}i32 [[BAR:@.*bar.*]]() -// DEVICE-DAG: define{{ dso_local | }}i32 [[BAZ:@.*baz.*]]() -// DEVICE-DAG: define{{ dso_local | }}i32 [[DOO:@.*doo.*]]() -// DEVICE-DAG: define{{ dso_local | }}i32 [[CAR:@.*car.*]]() -// DEVICE-DAG: define{{ dso_local | }}i32 [[CAZ:@.*caz.*]]() +// DEVICE-DAG: define{{ hidden | }}i32 [[FOO:@.*foo.*]]() +// DEVICE-DAG: define{{ hidden | }}i32 [[BAR:@.*bar.*]]() +// DEVICE-DAG: define{{ hidden | }}i32 [[BAZ:@.*baz.*]]() +// DEVICE-DAG: define{{ hidden | }}i32 [[DOO:@.*doo.*]]() +// DEVICE-DAG: define{{ hidden | }}i32 [[CAR:@.*car.*]]() +// DEVICE-DAG: define{{ hidden | }}i32 [[CAZ:@.*caz.*]]() static int c = foo() + bar() + baz(); #pragma omp declare target (c) Index: clang/test/OpenMP/nvptx_allocate_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -17,11 +17,11 @@ extern const omp_allocator_handle_t omp_thread_mem_alloc; // CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32, -// CHECK-DAG: @a ={{ dso_local | }}global i32 0, -// CHECK-DAG: @b ={{ dso_local | }}addrspace(4) global i32 0, -// CHECK-DAG: @c ={{ dso_local | }}global i32 0, -// CHECK-DAG: @d ={{ dso_local | }}global %struct.St1 zeroinitializer, -// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} ={{ dso_local | }}addrspace(3) global i32 0, +// CHECK-DAG: @a ={{ hidden | }}global i32 0, +// CHECK-DAG: @b ={{ hidden | }}addrspace(4) global i32 0, +// CHECK-DAG: @c ={{ hidden | }}global i32 0, +// CHECK-DAG: @d ={{ hidden | }}global %struct.St1 zeroinitializer, +// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} ={{ hidden | }}addrspace(3) global i32 0, // CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0, // CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32, // CHECK-DAG: @bar_c = internal global i32 0, @@ -79,7 +79,7 @@ void baz(float &); -// CHECK: define{{ dso_local | }}void @{{.+}}bar{{.+}}() +// CHECK: define{{ hidden | }}void @{{.+}}bar{{.+}}() void bar() { // CHECK: alloca float, float bar_a; Index: clang/test/OpenMP/declare_target_codegen.cpp =================================================================== --- clang/test/OpenMP/declare_target_codegen.cpp +++ clang/test/OpenMP/declare_target_codegen.cpp @@ -27,22 +27,22 @@ // CHECK-DAG: Bake // CHECK-NOT: @{{hhh|ggg|fff|eee}} = // CHECK-DAG: @aaa = external global i32, -// CHECK-DAG: @bbb ={{ dso_local | }}global i32 0, +// CHECK-DAG: @bbb ={{ hidden | }}global i32 0, // CHECK-DAG: weak constant %struct.__tgt_offload_entry { i8* bitcast (i32* @bbb to i8*), // CHECK-DAG: @ccc = external global i32, -// CHECK-DAG: @ddd ={{ dso_local | }}global i32 0, +// CHECK-DAG: @ddd ={{ hidden | }}global i32 0, // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @eee_decl_tgt_ref_ptr = weak global i32* null // CHECK-DAG: @{{.*}}maini1{{.*}}aaa = internal global i64 23, -// CHECK-DAG: @b ={{ dso_local | }}global i32 15, -// CHECK-DAG: @d ={{ dso_local | }}global i32 0, +// CHECK-DAG: @b ={{ hidden | }}global i32 15, +// CHECK-DAG: @d ={{ hidden | }}global i32 0, // CHECK-DAG: @c = external global i32, -// CHECK-DAG: @globals ={{ dso_local | }}global %struct.S zeroinitializer, +// CHECK-DAG: @globals ={{ hidden | }}global %struct.S zeroinitializer, // CHECK-DAG: [[STAT:@.+stat]] = internal global %struct.S zeroinitializer, // CHECK-DAG: [[STAT_REF:@.+]] = internal constant %struct.S* [[STAT]] -// CHECK-DAG: @out_decl_target ={{ dso_local | }}global i32 0, +// CHECK-DAG: @out_decl_target ={{ hidden | }}global i32 0, // CHECK-DAG: @llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading__{{.+}}_globals_l[[@LINE+84]]_ctor to i8*), i8* bitcast (void ()* @__omp_offloading__{{.+}}_stat_l[[@LINE+85]]_ctor to i8*)], // CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (%struct.S** [[STAT_REF]] to i8*)], Index: clang/lib/CodeGen/CodeGenModule.cpp =================================================================== --- clang/lib/CodeGen/CodeGenModule.cpp +++ clang/lib/CodeGen/CodeGenModule.cpp @@ -817,7 +817,7 @@ const auto &CGOpts = CGM.getCodeGenOpts(); llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); - if (RM != llvm::Reloc::Static && !LOpts.PIE && !LOpts.OpenMPIsDevice) + if (RM != llvm::Reloc::Static && !LOpts.PIE) return false; // A definition cannot be preempted from an executable. Index: clang/lib/AST/Decl.cpp =================================================================== --- clang/lib/AST/Decl.cpp +++ clang/lib/AST/Decl.cpp @@ -901,6 +901,10 @@ if (!isExternallyVisible(LV.getLinkage())) return LinkageInfo(LV.getLinkage(), DefaultVisibility, false); + // Mark the symbols as hidden when compiling for the device. + if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsDevice) + LV.mergeVisibility(HiddenVisibility, /*newExplicit=*/false); + return LV; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits