llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Johannes Doerfert (jdoerfert) <details> <summary>Changes</summary> We now provide the information about the min/max thread and team count from to the OMPIRBuilder, no matter what the source was. That means we unify `thread_limit`, `num_teams`, `num_threads` handling with the target specific attriutes (`__launch_bounds__` and `amdgpu_flat_work_group_size`). This is in preparation to pass the values to the runtime, and to allow the middle-end (OpenMP-opt) to tighten the values if it seems appropriate. There is no "real" change after this commit. --- Patch is 339.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70247.diff 12 Files Affected: - (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+54-27) - (modified) clang/lib/CodeGen/CGOpenMPRuntime.h (+2-1) - (modified) clang/lib/CodeGen/CodeGenModule.h (+11-3) - (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+8-2) - (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+33-13) - (modified) clang/test/OpenMP/ompx_attributes_codegen.cpp (+22-12) - (modified) clang/test/OpenMP/target_parallel_codegen.cpp (+48-48) - (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+192-192) - (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+796-796) - (modified) clang/test/OpenMP/thread_limit_nvptx.c (+4-4) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+36-12) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+117-55) ``````````diff diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index aae1a0ea250eea2..9b7ff5f66f2f50d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6021,15 +6021,46 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( }; // Get NumTeams and ThreadLimit attributes - int32_t DefaultValTeams = -1; - uint32_t DefaultValThreads = UINT32_MAX; - getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); - getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads, + int32_t DefaultValMinTeams = 1; + int32_t DefaultValMaxTeams = -1; + uint32_t DefaultValMinThreads = 1; + uint32_t DefaultValMaxThreads = UINT32_MAX; + + getNumTeamsExprForTargetDirective(CGF, D, DefaultValMinTeams, + DefaultValMaxTeams); + getNumThreadsExprForTargetDirective(CGF, D, DefaultValMaxThreads, /*UpperBoundOnly=*/true); - OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, - DefaultValTeams, DefaultValThreads, - IsOffloadEntry, OutlinedFn, OutlinedFnID); + for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { + for (auto *A : C->getAttrs()) { + int32_t MinThreadsVal = 1, MaxThreadsVal = 0; + int32_t MinBlocksVal = 1, MaxBlocksVal = -1; + if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) + CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &MaxThreadsVal, + &MinBlocksVal, &MaxBlocksVal); + else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) + CGM.handleAMDGPUFlatWorkGroupSizeAttr( + nullptr, Attr, /*ReqdWGS=*/nullptr, &MinThreadsVal, &MaxThreadsVal); + else + continue; + + DefaultValMinThreads = + std::max(DefaultValMinThreads, uint32_t(MinThreadsVal)); + DefaultValMaxThreads = + DefaultValMaxThreads + ? std::min(DefaultValMaxThreads, uint32_t(MaxThreadsVal)) + : MaxThreadsVal; + DefaultValMinTeams = DefaultValMinTeams + ? std::max(DefaultValMinTeams, MinBlocksVal) + : MinBlocksVal; + DefaultValMaxTeams = std::min(DefaultValMaxTeams, MaxBlocksVal); + } + } + + OMPBuilder.emitTargetRegionFunction( + EntryInfo, GenerateOutlinedFunction, DefaultValMinTeams, + DefaultValMaxTeams, DefaultValMinThreads, DefaultValMaxThreads, + IsOffloadEntry, OutlinedFn, OutlinedFnID); if (!OutlinedFn) return; @@ -6038,14 +6069,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { for (auto *A : C->getAttrs()) { - if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) - CGM.handleCUDALaunchBoundsAttr(OutlinedFn, Attr); - else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) - CGM.handleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn, Attr); - else if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) + if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); - else - llvm_unreachable("Unexpected attribute kind"); } } } @@ -6103,8 +6128,8 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, } const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, + int32_t &MaxTeamsVal) { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && @@ -6125,22 +6150,22 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( if (NumTeams->isIntegerConstantExpr(CGF.getContext())) if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) - DefaultVal = Constant->getExtValue(); + MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); return NumTeams; } - DefaultVal = 0; + MinTeamsVal = MaxTeamsVal = 0; return nullptr; } if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { - DefaultVal = 1; + MinTeamsVal = MaxTeamsVal = 1; return nullptr; } - DefaultVal = 1; + MinTeamsVal = MaxTeamsVal = 1; return nullptr; } // A value of -1 is used to check if we need to emit no teams region - DefaultVal = -1; + MinTeamsVal = MaxTeamsVal = -1; return nullptr; } case OMPD_target_teams_loop: @@ -6154,10 +6179,10 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); if (NumTeams->isIntegerConstantExpr(CGF.getContext())) if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) - DefaultVal = Constant->getExtValue(); + MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); return NumTeams; } - DefaultVal = 0; + MinTeamsVal = MaxTeamsVal = 0; return nullptr; } case OMPD_target_parallel: @@ -6165,7 +6190,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( case OMPD_target_parallel_for_simd: case OMPD_target_parallel_loop: case OMPD_target_simd: - DefaultVal = 1; + MinTeamsVal = MaxTeamsVal = 1; return nullptr; case OMPD_parallel: case OMPD_for: @@ -6240,8 +6265,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( "Clauses associated with the teams directive expected to be emitted " "only for the host!"); CGBuilderTy &Bld = CGF.Builder; - int32_t DefaultNT = -1; - const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT); + int32_t MinNT = -1, MaxNT = -1; + const Expr *NumTeams = + getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); if (NumTeams != nullptr) { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); @@ -6271,7 +6297,8 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( } } - return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT); + assert(MinNT == MaxNT && "Num threads ranges require handling here."); + return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); } /// Check for a num threads constant value (stored in \p DefaultVal), or diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 74b528d6cd7f8cc..d2f922da3320924 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -637,7 +637,8 @@ class CGOpenMPRuntime { /// Otherwise, return nullptr. const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, - int32_t &DefaultVal); + int32_t &MinTeamsVal, + int32_t &MaxTeamsVal); llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D); diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 073b471c6e3cc11..793861f23b15f95 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1543,15 +1543,23 @@ class CodeGenModule : public CodeGenTypeCache { void moveLazyEmissionStates(CodeGenModule *NewBuilder); /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F. + /// If \p MaxThreadsVal is not nullptr, the max threads value is stored in it, + /// if a valid one was found. void handleCUDALaunchBoundsAttr(llvm::Function *F, - const CUDALaunchBoundsAttr *A); + const CUDALaunchBoundsAttr *A, + int32_t *MaxThreadsVal = nullptr, + int32_t *MinBlocksVal = nullptr, + int32_t *MaxClusterRankVal = nullptr); /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute /// to \p F. Alternatively, the work group size can be taken from a \p - /// ReqdWGS. + /// ReqdWGS. If \p MinThreadsVal is not nullptr, the min threads value is + /// stored in it, if a valid one was found. If \p MaxThreadsVal is not + /// nullptr, the max threads value is stored in it, if a valid one was found. void handleAMDGPUFlatWorkGroupSizeAttr( llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, - const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr); + const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr, + int32_t *MinThreadsVal = nullptr, int32_t *MaxThreadsVal = nullptr); /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F. void handleAMDGPUWavesPerEUAttr(llvm::Function *F, diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index f6a614b3e4d54dd..0411846cf9b02bd 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -594,7 +594,8 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel( void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS, - const ReqdWorkGroupSizeAttr *ReqdWGS) { + const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal, + int32_t *MaxThreadsVal) { unsigned Min = 0; unsigned Max = 0; if (FlatWGS) { @@ -607,8 +608,13 @@ void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr( if (Min != 0) { assert(Min <= Max && "Min must be less than or equal Max"); + if (MinThreadsVal) + *MinThreadsVal = Min; + if (MaxThreadsVal) + *MaxThreadsVal = Max; std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); - F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + if (F) + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); } else assert(Max == 0 && "Max must be zero"); } diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 64d019a10514d60..9057cc2178e19de 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -9,6 +9,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include <cstdint> using namespace clang; using namespace clang::CodeGen; @@ -287,14 +288,23 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { } } -void CodeGenModule::handleCUDALaunchBoundsAttr( - llvm::Function *F, const CUDALaunchBoundsAttr *Attr) { +void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F, + const CUDALaunchBoundsAttr *Attr, + int32_t *MaxThreadsVal, + int32_t *MinBlocksVal, + int32_t *MaxClusterRankVal) { // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node llvm::APSInt MaxThreads(32); MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext()); - if (MaxThreads > 0) - NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", - MaxThreads.getExtValue()); + if (MaxThreads > 0) { + if (MaxThreadsVal) + *MaxThreadsVal = MaxThreads.getExtValue(); + if (F) { + // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx", + MaxThreads.getExtValue()); + } + } // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it // was not specified in __launch_bounds__ or if the user specified a 0 value, @@ -302,18 +312,28 @@ void CodeGenModule::handleCUDALaunchBoundsAttr( if (Attr->getMinBlocks()) { llvm::APSInt MinBlocks(32); MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext()); - if (MinBlocks > 0) - // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node - NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", - MinBlocks.getExtValue()); + if (MinBlocks > 0) { + if (MinBlocksVal) + *MinBlocksVal = MinBlocks.getExtValue(); + if (F) { + // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm", + MinBlocks.getExtValue()); + } + } } if (Attr->getMaxBlocks()) { llvm::APSInt MaxBlocks(32); MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext()); - if (MaxBlocks > 0) - // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node - NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", - MaxBlocks.getExtValue()); + if (MaxBlocks > 0) { + if (MaxClusterRankVal) + *MaxClusterRankVal = MaxBlocks.getExtValue(); + if (F) { + // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node + NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank", + MaxBlocks.getExtValue()); + } + } } } diff --git a/clang/test/OpenMP/ompx_attributes_codegen.cpp b/clang/test/OpenMP/ompx_attributes_codegen.cpp index 21e9805cbe8293b..bcf524b464aef5f 100644 --- a/clang/test/OpenMP/ompx_attributes_codegen.cpp +++ b/clang/test/OpenMP/ompx_attributes_codegen.cpp @@ -1,16 +1,17 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD +// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64 -fopenmp-targets=nvptx64 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=NVIDIA // expected-no-diagnostics // Check that the target attributes are set on the generated kernel void func() { - // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l15() #0 - // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l17() - // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l19() #4 + // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l16() #0 + // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l18() + // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l20() #4 #pragma omp target ompx_attribute([[clang::amdgpu_flat_work_group_size(10, 20)]]) {} @@ -21,11 +22,20 @@ void func() { {} } -// CHECK: attributes #0 -// CHECK-SAME: "amdgpu-flat-work-group-size"="10,20" -// CHECK: attributes #4 -// CHECK-SAME: "amdgpu-flat-work-group-size"="3,17" -// CHECK-SAME: "amdgpu-waves-per-eu"="3,7" +// AMD: attributes #0 +// AMD-SAME: "amdgpu-flat-work-group-size"="10,20" +// AMD-SAME: "omp_target_thread_limit"="20" +// AMD: "omp_target_thread_limit"="45" +// AMD: attributes #4 +// AMD-SAME: "amdgpu-flat-work-group-size"="3,17" +// AMD-SAME: "amdgpu-waves-per-eu"="3,7" +// AMD-SAME: "omp_target_thread_limit"="17" -// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"maxntidx", i32 45} -// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"minctasm", i32 90} +// It is unclear if we should use the AMD annotations for other targets, we do for now. +// NVIDIA: "omp_target_thread_limit"="20" +// NVIDIA: "omp_target_thread_limit"="45" +// NVIDIA: "omp_target_thread_limit"="17" +// NVIDIA: !{ptr @__omp_offloading[[HASH1:.*]]_l16, !"maxntidx", i32 20} +// NVIDIA: !{ptr @__omp_offloading[[HASH2:.*]]_l18, !"minctasm", i32 90} +// NVIDIA: !{ptr @__omp_offloading[[HASH2]]_l18, !"maxntidx", i32 45} +// NVIDIA: !{ptr @__omp_offloading[[HASH3:.*]]_l20, !"maxntidx", i32 17} diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index df8a2c878760c1d..c8af38e32e638e6 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -603,42 +603,42 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !21 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !35 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !35 +// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !35 +// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !35 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !35 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !35 +// CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !35 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 -// CHECK1-NEXT: store i32 0, ptr [[TMP9]], align 4, !noalias !21 +// CHECK1-NEXT: store i32 0, ptr [[TMP9]], align 4, !noalias !35 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8, !noalias !21 +// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8, !noalias !35 // ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/70247 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits