Author: abataev Date: Wed May 2 13:03:27 2018 New Revision: 331393 URL: http://llvm.org/viewvc/llvm-project?rev=331393&view=rev Log: [OPENMP] Add support for reductions on simd directives in target regions.
Added codegen for `simd reduction()` constructs in target directives. Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=331393&r1=331392&r2=331393&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original) +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Wed May 2 13:03:27 2018 @@ -61,6 +61,12 @@ enum OpenMPRTLFunctionNVPTX { /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, + /// \brief Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_simd_reduce_nowait, /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, /// int32_t num_vars, size_t reduce_size, void *reduce_data, /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t @@ -1028,6 +1034,33 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntime FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); break; } + case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: { + // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t Algorithm Version), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait"); + break; + } case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, // int32_t num_vars, size_t reduce_size, void *reduce_data, @@ -2703,8 +2736,8 @@ void CGOpenMPRuntimeNVPTX::emitReduction bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - // FIXME: Add support for simd reduction. - assert((TeamsReduction || ParallelReduction) && + bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind); + assert((TeamsReduction || ParallelReduction || SimdReduction) && "Invalid reduction selection in emitReduction."); ASTContext &C = CGM.getContext(); @@ -2764,19 +2797,22 @@ void CGOpenMPRuntimeNVPTX::emitReduction llvm::Value *InterWarpCopyFn = emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); - llvm::Value *Res = nullptr; - if (ParallelReduction) { - llvm::Value *Args[] = {ThreadId, - CGF.Builder.getInt32(RHSExprs.size()), - ReductionArrayTySize, - RL, - ShuffleAndReduceFn, - InterWarpCopyFn}; + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn}; + llvm::Value *Res = nullptr; + if (ParallelReduction) Res = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), Args); - } + else if (SimdReduction) + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait), + Args); if (TeamsReduction) { llvm::Value *ScratchPadCopyFn = Modified: cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp?rev=331393&r1=331392&r2=331393&view=diff ============================================================================== --- cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp (original) +++ cfe/trunk/test/OpenMP/nvptx_target_simd_codegen.cpp Wed May 2 13:03:27 2018 @@ -9,9 +9,10 @@ #define HEADER // Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l24}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l25}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l35}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l40}}_exec_mode = weak constant i8 0 #define N 1000 @@ -20,14 +21,14 @@ tx ftemplate(int n) { tx a[N]; short aa[N]; tx b[10]; - + #pragma omp target simd for(int i = 0; i < n; i++) { a[i] = 1; } #pragma omp target simd - for(int i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { aa[i] += 1; } @@ -36,6 +37,11 @@ tx ftemplate(int n) { b[i] += 1; } + #pragma omp target simd reduction(+:n) + for(int i = 0; i < 10; i++) { + b[i] += 1; + } + return a[0]; } @@ -47,7 +53,7 @@ int bar(int n){ return a; } -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l24}}( +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l25}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK-NOT: call void @__kmpc_for_static_init @@ -55,7 +61,7 @@ int bar(int n){ // CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l29}}( +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l30}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK-NOT: call void @__kmpc_for_static_init @@ -63,7 +69,7 @@ int bar(int n){ // CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l34}}( +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l35}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], // CHECK-NOT: call void @__kmpc_for_static_init @@ -71,4 +77,16 @@ int bar(int n){ // CHECK: call void @__kmpc_spmd_kernel_deinit() // CHECK: ret void +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l40}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-NOT: call void @__kmpc_for_static_init +// CHECK-NOT: call void @__kmpc_for_static_fini +// CHECK: [[RES:%.+]] = call i32 @__kmpc_nvptx_simd_reduce_nowait(i32 %{{.+}}, i32 1, i{{64|32}} {{8|4}}, i8* %{{.+}}, void (i8*, i16, i16, i16)* @{{.+}}, void (i8*, i32)* @{{.+}}) +// CHECK: switch i32 [[RES]] +// CHECK: call void @__kmpc_nvptx_end_reduce_nowait(i32 %{{.+}}) +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + + #endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits