https://github.com/shiltian created https://github.com/llvm/llvm-project/pull/176660
None >From a8a3698e814ee215a56005ad02748258c06975e2 Mon Sep 17 00:00:00 2001 From: Shilei Tian <[email protected]> Date: Sun, 18 Jan 2026 12:35:01 -0500 Subject: [PATCH] [NFC][Clang][OpenMP] Automatically generate check lines for `clang/test/OpenMP/amdgcn-attributes.cpp` --- clang/test/OpenMP/amdgcn-attributes.cpp | 366 +++++++++++++++++++++++- 1 file changed, 356 insertions(+), 10 deletions(-) diff --git a/clang/test/OpenMP/amdgcn-attributes.cpp b/clang/test/OpenMP/amdgcn-attributes.cpp index 03f5c31e3157c..39f189eee87d3 100644 --- a/clang/test/OpenMP/amdgcn-attributes.cpp +++ b/clang/test/OpenMP/amdgcn-attributes.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" --prefix-filecheck-ir-name VAR --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc @@ -14,7 +15,6 @@ int callable(int); // Check that the target attributes are set on the generated kernel int func() { - // ALL-LABEL: amdgpu_kernel void @__omp_offloading{{.*}} #0 int arr[N]; @@ -27,14 +27,360 @@ int func() { } int callable(int x) { - // ALL-LABEL: @_Z8callablei(i32 noundef %x) #2 return x + 1; } - -// DEFAULT: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// CPU: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" "uniform-work-group-size"="true" } -// NOIEEE: attributes #0 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,42" "amdgpu-ieee"="false" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "omp_target_thread_limit"="42" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } - -// DEFAULT: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CPU: attributes #2 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+cube-insts,+cvt-pknorm-vop2-insts,+dpp,+gfx8-insts,+gfx9-insts,+lerp-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+sad-insts,+wavefrontsize64" } -// NOIEEE: attributes #2 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// DEFAULT: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// DEFAULT-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21( +// DEFAULT-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEFAULT-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEFAULT-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// DEFAULT-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// DEFAULT-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// DEFAULT-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// DEFAULT-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// DEFAULT-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// DEFAULT-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// DEFAULT-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// DEFAULT-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]]) +// DEFAULT-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// DEFAULT-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] +// DEFAULT: [[USER_CODE_ENTRY]]: +// DEFAULT-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// DEFAULT-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// DEFAULT-NEXT: call void @__kmpc_target_deinit() +// DEFAULT-NEXT: ret void +// DEFAULT: [[WORKER_EXIT]]: +// DEFAULT-NEXT: ret void +// +// +// DEFAULT: Function Attrs: convergent noinline norecurse nounwind optnone +// DEFAULT-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined( +// DEFAULT-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEFAULT-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// DEFAULT-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// DEFAULT-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// DEFAULT-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// DEFAULT-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]] +// DEFAULT-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// DEFAULT-NEXT: br label %[[FOR_COND:.*]] +// DEFAULT: [[FOR_COND]]: +// DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// DEFAULT-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100 +// DEFAULT-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// DEFAULT: [[FOR_BODY]]: +// DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// DEFAULT-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// DEFAULT-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]] +// DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// DEFAULT-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]] +// DEFAULT-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4 +// DEFAULT-NEXT: br label %[[FOR_INC:.*]] +// DEFAULT: [[FOR_INC]]: +// DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// DEFAULT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// DEFAULT-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4 +// DEFAULT-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// DEFAULT: [[FOR_END]]: +// DEFAULT-NEXT: ret void +// +// +// DEFAULT: Function Attrs: convergent mustprogress noinline nounwind optnone +// DEFAULT-LABEL: define hidden noundef i32 @_Z8callablei( +// DEFAULT-SAME: i32 noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// DEFAULT-NEXT: [[ENTRY:.*:]] +// DEFAULT-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// DEFAULT-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// DEFAULT-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// DEFAULT-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// DEFAULT-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4 +// DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4 +// DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// DEFAULT-NEXT: ret i32 [[ADD]] +// +// +// ALL: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// ALL-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21( +// ALL-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +// ALL-NEXT: [[ENTRY:.*:]] +// ALL-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// ALL-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// ALL-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// ALL-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// ALL-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// ALL-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// ALL-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// ALL-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// ALL-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// ALL-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// ALL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// ALL-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]]) +// ALL-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// ALL-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] +// ALL: [[USER_CODE_ENTRY]]: +// ALL-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// ALL-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// ALL-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// ALL-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// ALL-NEXT: call void @__kmpc_target_deinit() +// ALL-NEXT: ret void +// ALL: [[WORKER_EXIT]]: +// ALL-NEXT: ret void +// +// +// ALL: Function Attrs: convergent noinline norecurse nounwind optnone +// ALL-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined( +// ALL-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] { +// ALL-NEXT: [[ENTRY:.*:]] +// ALL-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// ALL-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// ALL-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// ALL-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// ALL-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// ALL-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// ALL-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// ALL-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// ALL-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// ALL-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// ALL-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// ALL-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]] +// ALL-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// ALL-NEXT: br label %[[FOR_COND:.*]] +// ALL: [[FOR_COND]]: +// ALL-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// ALL-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100 +// ALL-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// ALL: [[FOR_BODY]]: +// ALL-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// ALL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// ALL-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// ALL-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]] +// ALL-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// ALL-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// ALL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]] +// ALL-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4 +// ALL-NEXT: br label %[[FOR_INC:.*]] +// ALL: [[FOR_INC]]: +// ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// ALL-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// ALL-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4 +// ALL-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// ALL: [[FOR_END]]: +// ALL-NEXT: ret void +// +// +// ALL: Function Attrs: convergent mustprogress noinline nounwind optnone +// ALL-LABEL: define hidden noundef i32 @_Z8callablei( +// ALL-SAME: i32 noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// ALL-NEXT: [[ENTRY:.*:]] +// ALL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// ALL-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// ALL-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// ALL-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// ALL-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4 +// ALL-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4 +// ALL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// ALL-NEXT: ret i32 [[ADD]] +// +// +// CPU: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// CPU-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21( +// CPU-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +// CPU-NEXT: [[ENTRY:.*:]] +// CPU-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CPU-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// CPU-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// CPU-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// CPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// CPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// CPU-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// CPU-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// CPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]]) +// CPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CPU-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] +// CPU: [[USER_CODE_ENTRY]]: +// CPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// CPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// CPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// CPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// CPU-NEXT: call void @__kmpc_target_deinit() +// CPU-NEXT: ret void +// CPU: [[WORKER_EXIT]]: +// CPU-NEXT: ret void +// +// +// CPU: Function Attrs: convergent noinline norecurse nounwind optnone +// CPU-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined( +// CPU-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] { +// CPU-NEXT: [[ENTRY:.*:]] +// CPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CPU-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CPU-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// CPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CPU-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// CPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]] +// CPU-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// CPU-NEXT: br label %[[FOR_COND:.*]] +// CPU: [[FOR_COND]]: +// CPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100 +// CPU-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// CPU: [[FOR_BODY]]: +// CPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// CPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]] +// CPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CPU-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// CPU-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]] +// CPU-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4 +// CPU-NEXT: br label %[[FOR_INC:.*]] +// CPU: [[FOR_INC]]: +// CPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// CPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// CPU-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4 +// CPU-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CPU: [[FOR_END]]: +// CPU-NEXT: ret void +// +// +// CPU: Function Attrs: convergent mustprogress noinline nounwind optnone +// CPU-LABEL: define hidden noundef i32 @_Z8callablei( +// CPU-SAME: i32 noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// CPU-NEXT: [[ENTRY:.*:]] +// CPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// CPU-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CPU-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CPU-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4 +// CPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4 +// CPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CPU-NEXT: ret i32 [[ADD]] +// +// +// NOIEEE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +// NOIEEE-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21( +// NOIEEE-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] { +// NOIEEE-NEXT: [[ENTRY:.*:]] +// NOIEEE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// NOIEEE-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// NOIEEE-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// NOIEEE-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// NOIEEE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr +// NOIEEE-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// NOIEEE-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// NOIEEE-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// NOIEEE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 +// NOIEEE-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// NOIEEE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// NOIEEE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_kernel_environment to ptr), ptr [[DYN_PTR]]) +// NOIEEE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// NOIEEE-NEXT: br i1 [[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] +// NOIEEE: [[USER_CODE_ENTRY]]: +// NOIEEE-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) +// NOIEEE-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// NOIEEE-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// NOIEEE-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// NOIEEE-NEXT: call void @__kmpc_target_deinit() +// NOIEEE-NEXT: ret void +// NOIEEE: [[WORKER_EXIT]]: +// NOIEEE-NEXT: ret void +// +// +// NOIEEE: Function Attrs: convergent noinline norecurse nounwind optnone +// NOIEEE-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4funcv_l21_omp_outlined( +// NOIEEE-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] { +// NOIEEE-NEXT: [[ENTRY:.*:]] +// NOIEEE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// NOIEEE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// NOIEEE-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// NOIEEE-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// NOIEEE-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// NOIEEE-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// NOIEEE-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr +// NOIEEE-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// NOIEEE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// NOIEEE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// NOIEEE-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8 +// NOIEEE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]] +// NOIEEE-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// NOIEEE-NEXT: br label %[[FOR_COND:.*]] +// NOIEEE: [[FOR_COND]]: +// NOIEEE-NEXT: [[TMP1:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// NOIEEE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 100 +// NOIEEE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]] +// NOIEEE: [[FOR_BODY]]: +// NOIEEE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// NOIEEE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64 +// NOIEEE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// NOIEEE-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// NOIEEE-NEXT: [[CALL:%.*]] = call noundef i32 @_Z8callablei(i32 noundef [[TMP3]]) #[[ATTR4:[0-9]+]] +// NOIEEE-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// NOIEEE-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP4]] to i64 +// NOIEEE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM1]] +// NOIEEE-NEXT: store i32 [[CALL]], ptr [[ARRAYIDX2]], align 4 +// NOIEEE-NEXT: br label %[[FOR_INC:.*]] +// NOIEEE: [[FOR_INC]]: +// NOIEEE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// NOIEEE-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// NOIEEE-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4 +// NOIEEE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// NOIEEE: [[FOR_END]]: +// NOIEEE-NEXT: ret void +// +// +// NOIEEE: Function Attrs: convergent mustprogress noinline nounwind optnone +// NOIEEE-LABEL: define hidden noundef i32 @_Z8callablei( +// NOIEEE-SAME: i32 noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// NOIEEE-NEXT: [[ENTRY:.*:]] +// NOIEEE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) +// NOIEEE-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// NOIEEE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// NOIEEE-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// NOIEEE-NEXT: store i32 [[X]], ptr [[X_ADDR_ASCAST]], align 4 +// NOIEEE-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR_ASCAST]], align 4 +// NOIEEE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// NOIEEE-NEXT: ret i32 [[ADD]] +// +//. +// DEFAULT: [[META6]] = !{} +// DEFAULT: [[META7]] = !{i64 4} +// DEFAULT: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]]} +// DEFAULT: [[META9]] = !{!"llvm.loop.mustprogress"} +//. +// CPU: [[META6]] = !{} +// CPU: [[META7]] = !{i64 4} +// CPU: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]]} +// CPU: [[META9]] = !{!"llvm.loop.mustprogress"} +//. +// NOIEEE: [[META6]] = !{} +// NOIEEE: [[META7]] = !{i64 4} +// NOIEEE: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]]} +// NOIEEE: [[META9]] = !{!"llvm.loop.mustprogress"} +//. _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
