jdoerfert created this revision.
jdoerfert added reviewers: ABataev, arpith-jacob, guraypp, gtbercea, hfinkel.
jdoerfert added projects: OpenMP, clang.
jdoerfert added a parent revision: D59418: [OpenMP][Offloading] Extract common
functionality.
This commit simply moves the runtime function generation from the NVPTX
to the common target code generation.
This is a preperation patch for https://reviews.llvm.org/D59328
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D59420
Files:
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.h
@@ -38,6 +38,95 @@
/// Return the value decleration encapsulated in the expression \p E.
static const ValueDecl *getUnderlyingVar(const Expr *E);
+ enum OpenMPRTLTargetFunctions {
+ /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
+ /// int16_t RequiresOMPRuntime);
+ OMPRTL_NVPTX__kmpc_kernel_init,
+ /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+ OMPRTL_NVPTX__kmpc_kernel_deinit,
+ /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_init,
+ /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
+ /// Call to void __kmpc_kernel_prepare_parallel(void
+ /// *outlined_function, int16_t
+ /// IsOMPRuntimeInitialized);
+ OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
+ /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
+ /// int16_t IsOMPRuntimeInitialized);
+ OMPRTL_NVPTX__kmpc_kernel_parallel,
+ /// Call to void __kmpc_kernel_end_parallel();
+ OMPRTL_NVPTX__kmpc_kernel_end_parallel,
+ /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL_NVPTX__kmpc_serialized_parallel,
+ /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL_NVPTX__kmpc_end_serialized_parallel,
+ /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
+ /// int16_t lane_offset, int16_t warp_size);
+ OMPRTL_NVPTX__kmpc_shuffle_int32,
+ /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
+ /// int16_t lane_offset, int16_t warp_size);
+ OMPRTL_NVPTX__kmpc_shuffle_int64,
+ /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+ OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
+ /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+ /// global_tid, void *global_buffer, int32_t num_of_records, void*
+ /// reduce_data,
+ /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ /// lane_offset, int16_t shortCircuit),
+ /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+ /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+ /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+ /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+ /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+ /// *buffer, int idx, void *reduce_data));
+ OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
+ /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+ OMPRTL_NVPTX__kmpc_end_reduce_nowait,
+ /// Call to void __kmpc_data_sharing_init_stack();
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
+ /// Call to void __kmpc_data_sharing_init_stack_spmd();
+ OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
+ /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
+ /// int16_t UseSharedMemory);
+ OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
+ /// Call to void __kmpc_data_sharing_pop_stack(void *a);
+ OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
+ /// Call to void __kmpc_begin_sharing_variables(void ***args,
+ /// size_t n_args);
+ OMPRTL_NVPTX__kmpc_begin_sharing_variables,
+ /// Call to void __kmpc_end_sharing_variables();
+ OMPRTL_NVPTX__kmpc_end_sharing_variables,
+ /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
+ OMPRTL_NVPTX__kmpc_get_shared_variables,
+ /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL_NVPTX__kmpc_parallel_level,
+ /// Call to int8_t __kmpc_is_spmd_exec_mode();
+ OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
+ /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+ /// const void *buf, size_t size, int16_t is_shared, const void **res);
+ OMPRTL_NVPTX__kmpc_get_team_static_memory,
+ /// Call to void __kmpc_restore_team_static_memory(int16_t
+ /// isSPMDExecutionMode, int16_t is_shared);
+ OMPRTL_NVPTX__kmpc_restore_team_static_memory,
+ /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ OMPRTL__kmpc_barrier,
+ /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL__kmpc_barrier_simple_spmd,
+ };
+
+ /// Returns the OpenMP runtime function identified by \p ID.
+ llvm::FunctionCallee createTargetRuntimeFunction(OpenMPRTLTargetFunctions ID);
+
//
// Base class overrides.
//
Index: clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeTarget.cpp
@@ -49,6 +49,304 @@
return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
}
+llvm::FunctionCallee CGOpenMPRuntimeTarget::createTargetRuntimeFunction(
+ OpenMPRTLTargetFunctions ID) {
+ llvm::FunctionCallee RTLFn = nullptr;
+ switch (ID) {
+ case OMPRTL_NVPTX__kmpc_kernel_init: {
+ // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
+ // RequiresOMPRuntime);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_kernel_deinit: {
+ // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
+ // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+ // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+ // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
+ /// Build void __kmpc_kernel_prepare_parallel(
+ /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_kernel_parallel: {
+ /// Build bool __kmpc_kernel_parallel(void **outlined_function,
+ /// int16_t IsOMPRuntimeInitialized);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
+ llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
+ auto *FnTy =
+ llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
+ /// Build void __kmpc_kernel_end_parallel();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_serialized_parallel: {
+ // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
+ // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_shuffle_int32: {
+ // Build int32_t __kmpc_shuffle_int32(int32_t element,
+ // int16_t lane_offset, int16_t warp_size);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_shuffle_int64: {
+ // Build int64_t __kmpc_shuffle_int64(int64_t element,
+ // int16_t lane_offset, int16_t warp_size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
+ // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
+ // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
+ // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
+ // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
+ // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+ CGM.Int32Ty,
+ CGM.Int32Ty,
+ CGM.SizeTy,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
+ // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+ // global_tid, void *global_buffer, int32_t num_of_records, void*
+ // reduce_data,
+ // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+ // lane_offset, int16_t shortCircuit),
+ // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+ // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+ // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+ // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+ // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+ // *buffer, int idx, void *reduce_data));
+ llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+ CGM.Int16Ty, CGM.Int16Ty};
+ auto *ShuffleReduceFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+ auto *InterWarpCopyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
+ CGM.VoidPtrTy};
+ auto *GlobalListFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+ CGM.Int32Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrTy,
+ ShuffleReduceFnTy->getPointerTo(),
+ InterWarpCopyFnTy->getPointerTo(),
+ GlobalListFnTy->getPointerTo(),
+ GlobalListFnTy->getPointerTo(),
+ GlobalListFnTy->getPointerTo(),
+ GlobalListFnTy->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
+ /// Build void __kmpc_data_sharing_init_stack();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
+ /// Build void __kmpc_data_sharing_init_stack_spmd();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
+ // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
+ // int16_t UseSharedMemory);
+ llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
+ // Build void __kmpc_data_sharing_pop_stack(void *a);
+ llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy,
+ /*Name=*/"__kmpc_data_sharing_pop_stack");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
+ /// Build void __kmpc_begin_sharing_variables(void ***args,
+ /// size_t n_args);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
+ /// Build void __kmpc_end_sharing_variables();
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_get_shared_variables: {
+ /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
+ llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_parallel_level: {
+ // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
+ // Build int8_t __kmpc_is_spmd_exec_mode();
+ auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
+ // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+ // const void *buf, size_t size, int16_t is_shared, const void **res);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
+ CGM.Int16Ty, CGM.VoidPtrPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
+ break;
+ }
+ case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
+ // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
+ // int16_t is_shared);
+ llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
+ break;
+ }
+ case OMPRTL__kmpc_barrier: {
+ // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+ cast<llvm::Function>(RTLFn.getCallee())
+ ->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
+ case OMPRTL__kmpc_barrier_simple_spmd: {
+ // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
+ cast<llvm::Function>(RTLFn.getCallee())
+ ->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
+ }
+ return RTLFn;
+}
+
void CGOpenMPRuntimeTarget::createOffloadEntry(
llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t,
llvm::GlobalValue::LinkageTypes) {
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -277,12 +277,6 @@
ArrayRef<const Expr *> ReductionOps,
ReductionOptionsTy Options) override;
- /// Returns specified OpenMP runtime function for the current OpenMP
- /// implementation. Specialized for the NVPTX device.
- /// \param Function OpenMP runtime function.
- /// \return Specified function.
- llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
-
/// Translates the native parameter of outlined function if this is required
/// for target.
/// \param FD Field decl from captured record for the parameter.
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -22,92 +22,9 @@
using namespace clang;
using namespace CodeGen;
+using OpenMPRTLFunctionNVPTX = CGOpenMPRuntimeTarget::OpenMPRTLTargetFunctions;
+
namespace {
-enum OpenMPRTLFunctionNVPTX {
- /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
- /// int16_t RequiresOMPRuntime);
- OMPRTL_NVPTX__kmpc_kernel_init,
- /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
- OMPRTL_NVPTX__kmpc_kernel_deinit,
- /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
- /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
- OMPRTL_NVPTX__kmpc_spmd_kernel_init,
- /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
- OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
- /// Call to void __kmpc_kernel_prepare_parallel(void
- /// *outlined_function, int16_t
- /// IsOMPRuntimeInitialized);
- OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
- /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
- /// int16_t IsOMPRuntimeInitialized);
- OMPRTL_NVPTX__kmpc_kernel_parallel,
- /// Call to void __kmpc_kernel_end_parallel();
- OMPRTL_NVPTX__kmpc_kernel_end_parallel,
- /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
- /// global_tid);
- OMPRTL_NVPTX__kmpc_serialized_parallel,
- /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
- /// global_tid);
- OMPRTL_NVPTX__kmpc_end_serialized_parallel,
- /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
- /// int16_t lane_offset, int16_t warp_size);
- OMPRTL_NVPTX__kmpc_shuffle_int32,
- /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
- /// int16_t lane_offset, int16_t warp_size);
- OMPRTL_NVPTX__kmpc_shuffle_int64,
- /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
- /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
- OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
- /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
- /// global_tid, void *global_buffer, int32_t num_of_records, void*
- /// reduce_data,
- /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- /// lane_offset, int16_t shortCircuit),
- /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
- /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
- /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
- /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
- /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
- /// *buffer, int idx, void *reduce_data));
- OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
- /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
- OMPRTL_NVPTX__kmpc_end_reduce_nowait,
- /// Call to void __kmpc_data_sharing_init_stack();
- OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
- /// Call to void __kmpc_data_sharing_init_stack_spmd();
- OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
- /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
- /// int16_t UseSharedMemory);
- OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
- /// Call to void __kmpc_data_sharing_pop_stack(void *a);
- OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
- /// Call to void __kmpc_begin_sharing_variables(void ***args,
- /// size_t n_args);
- OMPRTL_NVPTX__kmpc_begin_sharing_variables,
- /// Call to void __kmpc_end_sharing_variables();
- OMPRTL_NVPTX__kmpc_end_sharing_variables,
- /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
- OMPRTL_NVPTX__kmpc_get_shared_variables,
- /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
- /// global_tid);
- OMPRTL_NVPTX__kmpc_parallel_level,
- /// Call to int8_t __kmpc_is_spmd_exec_mode();
- OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
- /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
- /// const void *buf, size_t size, int16_t is_shared, const void **res);
- OMPRTL_NVPTX__kmpc_get_team_static_memory,
- /// Call to void __kmpc_restore_team_static_memory(int16_t
- /// isSPMDExecutionMode, int16_t is_shared);
- OMPRTL_NVPTX__kmpc_restore_team_static_memory,
- /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
- OMPRTL__kmpc_barrier,
- /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
- /// global_tid);
- OMPRTL__kmpc_barrier_simple_spmd,
-};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
class NVPTXActionTy final : public PrePostActionTy {
@@ -1245,11 +1162,11 @@
llvm::Value *Args[] = {getThreadLimit(CGF),
Bld.getInt16(/*RequiresOMPRuntime=*/1)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
// For data sharing, we need to initialize the stack.
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
emitGenericVarsProlog(CGF, WST.Loc);
@@ -1274,7 +1191,7 @@
// TODO: Optimize runtime initialization and pass in correct value.
llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
// Barrier to terminate worker threads.
syncCTAThreads(CGF);
// Master thread jumps to exit point.
@@ -1349,11 +1266,11 @@
Bld.getInt16(RequiresFullRuntime ? 1 : 0),
/*RequiresDataSharing=*/Bld.getInt16(0)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
if (RequiresFullRuntime) {
// For data sharing, we need to initialize the stack.
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ CGF.EmitRuntimeCall(createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
}
@@ -1381,7 +1298,7 @@
llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
CGF.EmitBranch(EST.ExitBB);
@@ -1453,7 +1370,7 @@
llvm::Value *Args[] = {WorkFn.getPointer(),
/*RequiresOMPRuntime=*/Bld.getInt16(1)};
llvm::Value *Ret = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
// On termination condition (workid == 0), exit loop.
@@ -1519,7 +1436,7 @@
// Signal end of parallel region.
CGF.EmitBlock(TerminateBB);
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
llvm::None);
CGF.EmitBranch(BarrierBB);
@@ -1535,308 +1452,6 @@
clearLocThreadIdInsertPt(CGF);
}
-/// Returns specified OpenMP runtime function for the current OpenMP
-/// implementation. Specialized for the NVPTX device.
-/// \param Function OpenMP runtime function.
-/// \return Specified function.
-llvm::FunctionCallee
-CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
- llvm::FunctionCallee RTLFn = nullptr;
- switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
- case OMPRTL_NVPTX__kmpc_kernel_init: {
- // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
- // RequiresOMPRuntime);
- llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
- break;
- }
- case OMPRTL_NVPTX__kmpc_kernel_deinit: {
- // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
- break;
- }
- case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
- // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
- // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
- llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
- break;
- }
- case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
- // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
- llvm::Type *TypeParams[] = {CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
- break;
- }
- case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
- /// Build void __kmpc_kernel_prepare_parallel(
- /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
- break;
- }
- case OMPRTL_NVPTX__kmpc_kernel_parallel: {
- /// Build bool __kmpc_kernel_parallel(void **outlined_function,
- /// int16_t IsOMPRuntimeInitialized);
- llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
- llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
- auto *FnTy =
- llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
- break;
- }
- case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
- /// Build void __kmpc_kernel_end_parallel();
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
- break;
- }
- case OMPRTL_NVPTX__kmpc_serialized_parallel: {
- // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
- break;
- }
- case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
- // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
- break;
- }
- case OMPRTL_NVPTX__kmpc_shuffle_int32: {
- // Build int32_t __kmpc_shuffle_int32(int32_t element,
- // int16_t lane_offset, int16_t warp_size);
- llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
- break;
- }
- case OMPRTL_NVPTX__kmpc_shuffle_int64: {
- // Build int64_t __kmpc_shuffle_int64(int64_t element,
- // int16_t lane_offset, int16_t warp_size);
- llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
- break;
- }
- case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
- // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
- // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
- // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
- // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
- // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
- CGM.Int32Ty,
- CGM.Int32Ty,
- CGM.SizeTy,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
- break;
- }
- case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
- // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
- break;
- }
- case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
- // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
- // global_tid, void *global_buffer, int32_t num_of_records, void*
- // reduce_data,
- // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
- // lane_offset, int16_t shortCircuit),
- // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
- // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
- // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
- // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
- // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
- // *buffer, int idx, void *reduce_data));
- llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
- CGM.Int16Ty, CGM.Int16Ty};
- auto *ShuffleReduceFnTy =
- llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
- /*isVarArg=*/false);
- llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
- auto *InterWarpCopyFnTy =
- llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
- /*isVarArg=*/false);
- llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
- CGM.VoidPtrTy};
- auto *GlobalListFnTy =
- llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
- /*isVarArg=*/false);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
- CGM.Int32Ty,
- CGM.VoidPtrTy,
- CGM.Int32Ty,
- CGM.VoidPtrTy,
- ShuffleReduceFnTy->getPointerTo(),
- InterWarpCopyFnTy->getPointerTo(),
- GlobalListFnTy->getPointerTo(),
- GlobalListFnTy->getPointerTo(),
- GlobalListFnTy->getPointerTo(),
- GlobalListFnTy->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
- break;
- }
- case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
- /// Build void __kmpc_data_sharing_init_stack();
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
- break;
- }
- case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
- /// Build void __kmpc_data_sharing_init_stack_spmd();
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
- break;
- }
- case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
- // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
- // int16_t UseSharedMemory);
- llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(
- FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
- break;
- }
- case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
- // Build void __kmpc_data_sharing_pop_stack(void *a);
- llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy,
- /*Name=*/"__kmpc_data_sharing_pop_stack");
- break;
- }
- case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
- /// Build void __kmpc_begin_sharing_variables(void ***args,
- /// size_t n_args);
- llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
- break;
- }
- case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
- /// Build void __kmpc_end_sharing_variables();
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
- break;
- }
- case OMPRTL_NVPTX__kmpc_get_shared_variables: {
- /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
- llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
- break;
- }
- case OMPRTL_NVPTX__kmpc_parallel_level: {
- // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
- break;
- }
- case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
- // Build int8_t __kmpc_is_spmd_exec_mode();
- auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
- break;
- }
- case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
- // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
- // const void *buf, size_t size, int16_t is_shared, const void **res);
- llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
- CGM.Int16Ty, CGM.VoidPtrPtrTy};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
- break;
- }
- case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
- // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
- // int16_t is_shared);
- llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
- break;
- }
- case OMPRTL__kmpc_barrier: {
- // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
- cast<llvm::Function>(RTLFn.getCallee())
- ->addFnAttr(llvm::Attribute::Convergent);
- break;
- }
- case OMPRTL__kmpc_barrier_simple_spmd: {
- // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
- // global_tid);
- llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
- auto *FnTy =
- llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
- RTLFn =
- CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
- cast<llvm::Function>(RTLFn.getCallee())
- ->addFnAttr(llvm::Attribute::Convergent);
- break;
- }
- }
- return RTLFn;
-}
-
void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
@@ -2084,12 +1699,12 @@
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *PL = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
{RTLoc, ThreadID});
IsTTD = Bld.CreateIsNull(PL);
}
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -2123,7 +1738,7 @@
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
@@ -2186,7 +1801,7 @@
CGM.Int16Ty,
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ CGF.EmitRuntimeCall(createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_get_team_static_memory),
GlobalRecordSizeArg);
GlobalizedRecords.back().Buffer = StaticGlobalized;
@@ -2213,7 +1828,7 @@
llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
@@ -2312,7 +1927,7 @@
llvm::Value *GlobalRecordSizeArg[] = {
Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
GlobalRecordSizeArg);
llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
@@ -2341,7 +1956,7 @@
for (llvm::Value *Addr :
llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
Addr);
}
if (I->getSecond().GlobalRecordAddr) {
@@ -2356,7 +1971,7 @@
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
CGF.EmitBlock(ExitBB);
@@ -2378,12 +1993,12 @@
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
IsInSharedMemory};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_restore_team_static_memory),
Args);
}
} else {
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ CGF.EmitRuntimeCall(createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
I->getSecond().GlobalRecordAddr);
}
@@ -2436,9 +2051,9 @@
llvm::Value *Args[] = {RTLoc, ThreadID};
NVPTXActionTy Action(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
Args,
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
Args);
RCG.setAction(Action);
RCG(CGF);
@@ -2454,7 +2069,7 @@
// Prepare for parallel region. Indicate the outlined function.
llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
Args);
// Create a private scope that will globalize the arguments
@@ -2471,7 +2086,7 @@
llvm::Value *DataSharingArgs[] = {
SharedArgsPtr,
llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+ CGF.EmitRuntimeCall(createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_begin_sharing_variables),
DataSharingArgs);
@@ -2508,7 +2123,7 @@
if (!CapturedVars.empty())
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
// Remember for post-processing in worker loop.
Work.emplace_back(WFn);
@@ -2533,7 +2148,7 @@
llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -2541,7 +2156,7 @@
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *PL = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
{RTLoc, ThreadID});
llvm::Value *Res = Bld.CreateIsNotNull(PL);
Bld.CreateCondBr(Res, SeqBB, MasterBB);
@@ -2603,9 +2218,9 @@
llvm::Value *Args[] = {RTLoc, ThreadID};
NVPTXActionTy Action(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
Args,
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
Args);
RCG.setAction(Action);
RCG(CGF);
@@ -2636,7 +2251,7 @@
cast<llvm::PointerType>(getIdentTyPointerTy())),
llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+ createTargetRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
}
void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
@@ -2650,7 +2265,7 @@
unsigned Flags = getDefaultFlagsForBarriers(Kind);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+ CGF.EmitRuntimeCall(createTargetRuntimeFunction(OMPRTL__kmpc_barrier), Args);
}
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
@@ -2755,9 +2370,10 @@
assert(Size.getQuantity() <= 8 &&
"Unsupported bitwidth in shuffle instruction.");
- OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
- ? OMPRTL_NVPTX__kmpc_shuffle_int32
- : OMPRTL_NVPTX__kmpc_shuffle_int64;
+ OpenMPRTLFunctionNVPTX ShuffleFn =
+ Size.getQuantity() <= 4
+ ? CGOpenMPRuntimeTarget::OMPRTL_NVPTX__kmpc_shuffle_int32
+ : CGOpenMPRuntimeTarget::OMPRTL_NVPTX__kmpc_shuffle_int64;
// Cast all types to 32- or 64-bit values before calling shuffle routines.
QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
@@ -2767,7 +2383,7 @@
Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
- RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
+ RT.createTargetRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
}
@@ -4246,7 +3862,7 @@
InterWarpCopyFn};
Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
Args);
} else {
@@ -4296,7 +3912,7 @@
BufferToGlobalRedFn};
Res = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(
+ createTargetRuntimeFunction(
OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
Args);
}
@@ -4332,7 +3948,7 @@
RegionCodeGenTy RCG(CodeGen);
NVPTXActionTy Action(
nullptr, llvm::None,
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
EndArgs);
RCG.setAction(Action);
RCG(CGF);
@@ -4495,7 +4111,7 @@
llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
+ createTargetRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
DataSharingArgs);
// Retrieve the shared variables from the list of references returned
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits