https://github.com/vg0204 updated https://github.com/llvm/llvm-project/pull/95750
>From c9652ae9092e2d42268cd7f843d921c16410bfc9 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Mon, 17 Jun 2024 11:20:02 +0530 Subject: [PATCH 1/9] [Clang] [WIP] Added builtin_alloca support for OpenCL1.2 and below The __builtin_alloca was returning a flat pointer with no address space when compiled using openCL1.2 or below but worked fine with openCL2.0 and above. This accounts to the fact that later uses the concept of generic address space which supports cast to other address space(i.e to private address space which is used for stack allocation) . So, in case of openCL1.2 and below __built_alloca is supposed to return pointer to private address space to eliminate the need of casting as not supported here. Thus,it requires redefintion of the builtin function with appropraite return pointer to appropriate address space. --- clang/lib/Sema/SemaExpr.cpp | 23 +++++- clang/test/CodeGenOpenCL/builtins-alloca.cl | 86 +++++++++++++++++++++ clang/test/CodeGenOpenCL/memcpy.cl | 0 3 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-alloca.cl mode change 100644 => 100755 clang/test/CodeGenOpenCL/memcpy.cl diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 8d24e34520e77..cf4c98fbe2c38 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6121,7 +6121,10 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) { /// it does not contain any pointer arguments without /// an address space qualifer. Otherwise the rewritten /// FunctionDecl is returned. -/// TODO: Handle pointer return types. +/// +/// Pointer return type with no explicit address space is assigned the +/// default address space where pointer points to based on the language +/// option used to compile it. static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, FunctionDecl *FDecl, MultiExprArg ArgExprs) { @@ -6165,13 +6168,27 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, OverloadParams.push_back(Context.getPointerType(PointeeType)); } + QualType ReturnTy = FT->getReturnType(); + QualType OverloadReturnTy = ReturnTy; + if (ReturnTy->isPointerType() && + !ReturnTy->getPointeeType().hasAddressSpace()) { + if (Sema->getLangOpts().OpenCL) { + NeedsNewDecl = true; + + QualType ReturnPtTy = ReturnTy->getPointeeType(); + LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace(); + ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS); + OverloadReturnTy = Context.getPointerType(ReturnPtTy); + } + } + if (!NeedsNewDecl) return nullptr; FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = FT->isVariadic(); - QualType OverloadTy = Context.getFunctionType(FT->getReturnType(), - OverloadParams, EPI); + QualType OverloadTy = + Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI); DeclContext *Parent = FDecl->getParent(); FunctionDecl *OverloadDecl = FunctionDecl::Create( Context, Parent, FDecl->getLocation(), FDecl->getLocation(), diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl new file mode 100644 index 0000000000000..74a86955f2e4f --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl @@ -0,0 +1,86 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s + +// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1( +// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: ret ptr addrspace(5) [[TMP1]] +// +// OPENCL20-LABEL: define dso_local ptr @test1( +// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPENCL20-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) +// OPENCL20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr +// OPENCL20-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL20-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL20-NEXT: ret ptr [[TMP2]] +// +// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1( +// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: ret ptr addrspace(5) [[TMP1]] +// +// OPENCL30-EXT-LABEL: define dso_local ptr @test1( +// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL30-EXT-NEXT: [[ENTRY:.*:]] +// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) +// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr +// OPENCL30-EXT-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL30-EXT-NEXT: ret ptr [[TMP2]] +// +float* test1() { + float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int)); + return alloc_ptr; +} + +// OPENCL12-LABEL: define dso_local void @test2( +// OPENCL12-SAME: ) #[[ATTR0]] { +// OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: ret void +// +// OPENCL20-LABEL: define dso_local void @test2( +// OPENCL20-SAME: ) #[[ATTR0]] { +// OPENCL20-NEXT: [[ENTRY:.*:]] +// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPENCL20-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) +// OPENCL20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr +// OPENCL20-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL20-NEXT: ret void +// +// OPENCL30-LABEL: define dso_local void @test2( +// OPENCL30-SAME: ) #[[ATTR0]] { +// OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: ret void +// +// OPENCL30-EXT-LABEL: define dso_local void @test2( +// OPENCL30-EXT-SAME: ) #[[ATTR0]] { +// OPENCL30-EXT-NEXT: [[ENTRY:.*:]] +// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) +// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) +// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr +// OPENCL30-EXT-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL30-EXT-NEXT: ret void +// +void test2() { + void *alloc_ptr = __builtin_alloca(28); +} diff --git a/clang/test/CodeGenOpenCL/memcpy.cl b/clang/test/CodeGenOpenCL/memcpy.cl old mode 100644 new mode 100755 >From 4536fa95abde0b89c163444adb153dc841bea03a Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Mon, 17 Jun 2024 16:23:03 +0530 Subject: [PATCH 2/9] updated return pointer to always point to stack/private address space for buitins alloca variants. --- clang/lib/Sema/SemaExpr.cpp | 21 +- clang/test/CodeGenOpenCL/builtins-alloca.cl | 275 ++++++++++++++++---- clang/test/CodeGenOpenCL/memcpy.cl | 0 3 files changed, 242 insertions(+), 54 deletions(-) mode change 100755 => 100644 clang/test/CodeGenOpenCL/memcpy.cl diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index cf4c98fbe2c38..aa976e75db66f 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6176,7 +6176,26 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, NeedsNewDecl = true; QualType ReturnPtTy = ReturnTy->getPointeeType(); - LangAS defClAS = Context.getDefaultOpenCLPointeeAddrSpace(); + unsigned BuiltinID = FDecl->getBuiltinID(); + LangAS defClAS; + + // __builtin_alloca* should always return pointer to stack/private + // Address Space, while for other builtins with return pointer type, + // it should depend on the OpenCL version. + switch (BuiltinID) { + case Builtin::BI__builtin_alloca_uninitialized: + case Builtin::BI__builtin_alloca: + case Builtin::BI__builtin_alloca_with_align_uninitialized: + case Builtin::BI__builtin_alloca_with_align: { + defClAS = LangAS::opencl_private; + break; + } + default: { + defClAS = Context.getDefaultOpenCLPointeeAddrSpace(); + break; + } + } + ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS); OverloadReturnTy = Context.getPointerType(ReturnPtTy); } diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl index 74a86955f2e4f..2df6bf3dba6a3 100644 --- a/clang/test/CodeGenOpenCL/builtins-alloca.cl +++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl @@ -1,86 +1,255 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s -// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s -// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s -// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \ +// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \ +// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \ +// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s +// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \ +// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s -// OPENCL12-LABEL: define dso_local ptr addrspace(5) @test1( -// OPENCL12-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL12-LABEL: define dso_local void @test1( +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { // OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL12-NEXT: [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL12-NEXT: ret ptr addrspace(5) [[TMP1]] +// OPENCL12-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL12-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 +// OPENCL12-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL12-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 +// OPENCL12-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL12-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL12-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 +// OPENCL12-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: ret void // -// OPENCL20-LABEL: define dso_local ptr @test1( -// OPENCL20-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL20-LABEL: define dso_local void @test1( +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { // OPENCL20-NEXT: [[ENTRY:.*:]] -// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) -// OPENCL20-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) -// OPENCL20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr -// OPENCL20-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 -// OPENCL20-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8 -// OPENCL20-NEXT: ret ptr [[TMP2]] +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL20-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 +// OPENCL20-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL20-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL20-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 +// OPENCL20-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL20-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL20-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 +// OPENCL20-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL20-NEXT: ret void // -// OPENCL30-LABEL: define dso_local ptr addrspace(5) @test1( -// OPENCL30-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL30-LABEL: define dso_local void @test1( +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { // OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL30-NEXT: [[TMP1:%.*]] = load ptr addrspace(5), ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL30-NEXT: ret ptr addrspace(5) [[TMP1]] +// OPENCL30-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL30-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 +// OPENCL30-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL30-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 +// OPENCL30-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL30-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL30-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 +// OPENCL30-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: ret void // -// OPENCL30-EXT-LABEL: define dso_local ptr @test1( -// OPENCL30-EXT-SAME: ) #[[ATTR0:[0-9]+]] { +// OPENCL30-EXT-LABEL: define dso_local void @test1( +// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { // OPENCL30-EXT-NEXT: [[ENTRY:.*:]] -// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) -// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = alloca i8, i64 128, align 8, addrspace(5) -// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr -// OPENCL30-EXT-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 -// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[ALLOC_PTR]], align 8 -// OPENCL30-EXT-NEXT: ret ptr [[TMP2]] +// OPENCL30-EXT-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-EXT-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL30-EXT-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 +// OPENCL30-EXT-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL30-EXT-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL30-EXT-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 +// OPENCL30-EXT-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL30-EXT-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL30-EXT-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 +// OPENCL30-EXT-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL30-EXT-NEXT: ret void // -float* test1() { - float* alloc_ptr = (float*)__builtin_alloca(32 * sizeof(int)); - return alloc_ptr; +void test1(unsigned n) { + __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int)); + __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int)); + __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8); + __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8); } // OPENCL12-LABEL: define dso_local void @test2( -// OPENCL12-SAME: ) #[[ATTR0]] { +// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { // OPENCL12-NEXT: [[ENTRY:.*:]] +// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL12-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL12-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL12-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL12-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL12-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL12-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL12-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) +// OPENCL12-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL12-NEXT: ret void // // OPENCL20-LABEL: define dso_local void @test2( -// OPENCL20-SAME: ) #[[ATTR0]] { +// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { // OPENCL20-NEXT: [[ENTRY:.*:]] -// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) -// OPENCL20-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) -// OPENCL20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr -// OPENCL20-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL20-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL20-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL20-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL20-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL20-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL20-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL20-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) +// OPENCL20-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL20-NEXT: ret void // // OPENCL30-LABEL: define dso_local void @test2( -// OPENCL30-SAME: ) #[[ATTR0]] { +// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { // OPENCL30-NEXT: [[ENTRY:.*:]] +// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL30-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL30-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL30-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL30-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL30-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) +// OPENCL30-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL30-NEXT: ret void // // OPENCL30-EXT-LABEL: define dso_local void @test2( -// OPENCL30-EXT-SAME: ) #[[ATTR0]] { +// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { // OPENCL30-EXT-NEXT: [[ENTRY:.*:]] -// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr, align 8, addrspace(5) -// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = alloca i8, i64 28, align 8, addrspace(5) -// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[TMP0]] to ptr -// OPENCL30-EXT-NEXT: store ptr [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 8 +// OPENCL30-EXT-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL30-EXT-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 +// OPENCL30-EXT-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL30-EXT-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 +// OPENCL30-EXT-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL30-EXT-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL30-EXT-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 +// OPENCL30-EXT-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) +// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 // OPENCL30-EXT-NEXT: ret void // -void test2() { - void *alloc_ptr = __builtin_alloca(28); +void test2(unsigned n) { + __private void *alloc_ptr = __builtin_alloca(n); + __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n); + __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);; + __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8); } diff --git a/clang/test/CodeGenOpenCL/memcpy.cl b/clang/test/CodeGenOpenCL/memcpy.cl old mode 100755 new mode 100644 >From a5d2523bfcba1a873b671382b0aec279068619e5 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Wed, 3 Jul 2024 12:36:33 +0530 Subject: [PATCH 3/9] Moved the patch to dedicated builtin function in SemaChecking.cpp & refactored LIT test. --- clang/lib/Sema/SemaChecking.cpp | 29 +- clang/lib/Sema/SemaExpr.cpp | 42 +-- clang/test/CodeGenOpenCL/builtins-alloca.cl | 363 +++++++------------- 3 files changed, 157 insertions(+), 277 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 45b9bbb23dbf7..0a354c9bc42f7 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1477,6 +1477,31 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, return false; } +// In OpenCL, __builtin_alloca_* should return a pointer to address space +// that corresponds to the stack address space i.e private address space. +static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { + S.Diag(TheCall->getBeginLoc(), diag::warn_alloca) + << TheCall->getDirectCallee(); + + QualType RT = TheCall->getType(); + if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace()) + return true; + + if (S.getLangOpts().OpenCL) { + RT = RT->getPointeeType(); + + // __builtin_alloca* should always return pointer to stack/private + // Address Space, while for other builtins with return pointer type, + // it should depend on the OpenCL version. + LangAS openCLStackAS = LangAS::opencl_private; + + RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS); + TheCall->setType(S.Context.getPointerType(RT)); + } + + return false; +} + namespace { enum PointerAuthOpKind { PAO_Strip, @@ -2208,8 +2233,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, [[fallthrough]]; case Builtin::BI__builtin_alloca: case Builtin::BI__builtin_alloca_uninitialized: - Diag(TheCall->getBeginLoc(), diag::warn_alloca) - << TheCall->getDirectCallee(); + if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall)) + return ExprError(); break; case Builtin::BI__arithmetic_fence: if (BuiltinArithmeticFence(TheCall)) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index aa976e75db66f..8d24e34520e77 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6121,10 +6121,7 @@ bool Sema::CheckArgsForPlaceholders(MultiExprArg args) { /// it does not contain any pointer arguments without /// an address space qualifer. Otherwise the rewritten /// FunctionDecl is returned. -/// -/// Pointer return type with no explicit address space is assigned the -/// default address space where pointer points to based on the language -/// option used to compile it. +/// TODO: Handle pointer return types. static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, FunctionDecl *FDecl, MultiExprArg ArgExprs) { @@ -6168,46 +6165,13 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, OverloadParams.push_back(Context.getPointerType(PointeeType)); } - QualType ReturnTy = FT->getReturnType(); - QualType OverloadReturnTy = ReturnTy; - if (ReturnTy->isPointerType() && - !ReturnTy->getPointeeType().hasAddressSpace()) { - if (Sema->getLangOpts().OpenCL) { - NeedsNewDecl = true; - - QualType ReturnPtTy = ReturnTy->getPointeeType(); - unsigned BuiltinID = FDecl->getBuiltinID(); - LangAS defClAS; - - // __builtin_alloca* should always return pointer to stack/private - // Address Space, while for other builtins with return pointer type, - // it should depend on the OpenCL version. - switch (BuiltinID) { - case Builtin::BI__builtin_alloca_uninitialized: - case Builtin::BI__builtin_alloca: - case Builtin::BI__builtin_alloca_with_align_uninitialized: - case Builtin::BI__builtin_alloca_with_align: { - defClAS = LangAS::opencl_private; - break; - } - default: { - defClAS = Context.getDefaultOpenCLPointeeAddrSpace(); - break; - } - } - - ReturnPtTy = Context.getAddrSpaceQualType(ReturnPtTy, defClAS); - OverloadReturnTy = Context.getPointerType(ReturnPtTy); - } - } - if (!NeedsNewDecl) return nullptr; FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = FT->isVariadic(); - QualType OverloadTy = - Context.getFunctionType(OverloadReturnTy, OverloadParams, EPI); + QualType OverloadTy = Context.getFunctionType(FT->getReturnType(), + OverloadParams, EPI); DeclContext *Parent = FDecl->getParent(); FunctionDecl *OverloadDecl = FunctionDecl::Create( Context, Parent, FDecl->getLocation(), FDecl->getLocation(), diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl index 2df6bf3dba6a3..02f5e0de8d1b0 100644 --- a/clang/test/CodeGenOpenCL/builtins-alloca.cl +++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl @@ -1,255 +1,146 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL12 %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL12 %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL20 %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL20 %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL30 %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30 %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \ -// RUN: -emit-llvm -o - | FileCheck --check-prefix=OPENCL30-EXT %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30-EXT %s -// OPENCL12-LABEL: define dso_local void @test1( -// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { -// OPENCL12-NEXT: [[ENTRY:.*:]] -// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL12-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 -// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL12-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL12-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 -// OPENCL12-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL12-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL12-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 -// OPENCL12-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL12-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL12-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 -// OPENCL12-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL12-NEXT: ret void +// OPENCL-LABEL: define dso_local void @test1_builtin_alloca( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL-NEXT: ret void // -// OPENCL20-LABEL: define dso_local void @test1( -// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { -// OPENCL20-NEXT: [[ENTRY:.*:]] -// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL20-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 -// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL20-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 -// OPENCL20-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL20-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL20-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 -// OPENCL20-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL20-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL20-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 -// OPENCL20-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL20-NEXT: ret void -// -// OPENCL30-LABEL: define dso_local void @test1( -// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { -// OPENCL30-NEXT: [[ENTRY:.*:]] -// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL30-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 -// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL30-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL30-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 -// OPENCL30-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL30-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL30-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 -// OPENCL30-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL30-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL30-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 -// OPENCL30-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL30-NEXT: ret void -// -// OPENCL30-EXT-LABEL: define dso_local void @test1( -// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { -// OPENCL30-EXT-NEXT: [[ENTRY:.*:]] -// OPENCL30-EXT-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL30-EXT-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 -// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL30-EXT-NEXT: [[MUL2:%.*]] = mul i64 [[CONV1]], 4 -// OPENCL30-EXT-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[MUL2]], align 8, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL30-EXT-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV3:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL30-EXT-NEXT: [[MUL4:%.*]] = mul i64 [[CONV3]], 4 -// OPENCL30-EXT-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[MUL4]], align 1, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL30-EXT-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV5:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL30-EXT-NEXT: [[MUL6:%.*]] = mul i64 [[CONV5]], 4 -// OPENCL30-EXT-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[MUL6]], align 1, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL30-EXT-NEXT: ret void -// -void test1(unsigned n) { +void test1_builtin_alloca(unsigned n) { __private float* alloc_ptr = (__private float*)__builtin_alloca(n*sizeof(int)); +} + +// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_uninitialized( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 8, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL-NEXT: ret void +// +void test1_builtin_alloca_uninitialized(unsigned n) { __private float* alloc_ptr_uninitialized = (__private float*)__builtin_alloca_uninitialized(n*sizeof(int)); +} + +// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL-NEXT: ret void +// +void test1_builtin_alloca_with_align(unsigned n) { __private float* alloc_ptr_align = (__private float*)__builtin_alloca_with_align((n*sizeof(int)), 8); +} + +// OPENCL-LABEL: define dso_local void @test1_builtin_alloca_with_align_uninitialized( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[MUL:%.*]] = mul i64 [[CONV]], 4 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[MUL]], align 1, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL-NEXT: ret void +// +void test1_builtin_alloca_with_align_uninitialized(unsigned n) { __private float* alloc_ptr_align_uninitialized = (__private float*)__builtin_alloca_with_align_uninitialized((n*sizeof(int)), 8); } -// OPENCL12-LABEL: define dso_local void @test2( -// OPENCL12-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { -// OPENCL12-NEXT: [[ENTRY:.*:]] -// OPENCL12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL12-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL12-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL12-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL12-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL12-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL12-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL12-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL12-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL12-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) -// OPENCL12-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL12-NEXT: ret void +// OPENCL-LABEL: define dso_local void @test2_builtin_alloca( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 +// OPENCL-NEXT: ret void // -// OPENCL20-LABEL: define dso_local void @test2( -// OPENCL20-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { -// OPENCL20-NEXT: [[ENTRY:.*:]] -// OPENCL20-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL20-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL20-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL20-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL20-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL20-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL20-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL20-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL20-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) -// OPENCL20-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL20-NEXT: ret void +void test2_builtin_alloca(unsigned n) { + __private void *alloc_ptr = __builtin_alloca(n); +} + +// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_uninitialized( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 +// OPENCL-NEXT: ret void // -// OPENCL30-LABEL: define dso_local void @test2( -// OPENCL30-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { -// OPENCL30-NEXT: [[ENTRY:.*:]] -// OPENCL30-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL30-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL30-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL30-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL30-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL30-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL30-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL30-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) -// OPENCL30-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL30-NEXT: ret void +void test2_builtin_alloca_uninitialized(unsigned n) { + __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n); +} + +// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 +// OPENCL-NEXT: ret void // -// OPENCL30-EXT-LABEL: define dso_local void @test2( -// OPENCL30-EXT-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { -// OPENCL30-EXT-NEXT: [[ENTRY:.*:]] -// OPENCL30-EXT-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) -// OPENCL30-EXT-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 -// OPENCL30-EXT-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 8, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR]], align 4 -// OPENCL30-EXT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV1:%.*]] = zext i32 [[TMP2]] to i64 -// OPENCL30-EXT-NEXT: [[TMP3:%.*]] = alloca i8, i64 [[CONV1]], align 8, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP3]], ptr addrspace(5) [[ALLOC_PTR_UNINITIALIZED]], align 4 -// OPENCL30-EXT-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV2:%.*]] = zext i32 [[TMP4]] to i64 -// OPENCL30-EXT-NEXT: [[TMP5:%.*]] = alloca i8, i64 [[CONV2]], align 1, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP5]], ptr addrspace(5) [[ALLOC_PTR_ALIGN]], align 4 -// OPENCL30-EXT-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 -// OPENCL30-EXT-NEXT: [[CONV3:%.*]] = zext i32 [[TMP6]] to i64 -// OPENCL30-EXT-NEXT: [[TMP7:%.*]] = alloca i8, i64 [[CONV3]], align 1, addrspace(5) -// OPENCL30-EXT-NEXT: store ptr addrspace(5) [[TMP7]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 -// OPENCL30-EXT-NEXT: ret void +void test2_builtin_alloca_with_align(unsigned n) { + __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8); +} + +// OPENCL-LABEL: define dso_local void @test2_builtin_alloca_with_align_uninitialized( +// OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0]] { +// OPENCL-NEXT: [[ENTRY:.*:]] +// OPENCL-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// OPENCL-NEXT: [[ALLOC_PTR_ALIGN_UNINITIALIZED:%.*]] = alloca ptr addrspace(5), align 4, addrspace(5) +// OPENCL-NEXT: store i32 [[N]], ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4 +// OPENCL-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// OPENCL-NEXT: [[TMP1:%.*]] = alloca i8, i64 [[CONV]], align 1, addrspace(5) +// OPENCL-NEXT: store ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[ALLOC_PTR_ALIGN_UNINITIALIZED]], align 4 +// OPENCL-NEXT: ret void // -void test2(unsigned n) { - __private void *alloc_ptr = __builtin_alloca(n); - __private void *alloc_ptr_uninitialized = __builtin_alloca_uninitialized(n); - __private void *alloc_ptr_align = __builtin_alloca_with_align(n, 8);; +void test2_builtin_alloca_with_align_uninitialized(unsigned n) { __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8); } +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// OPENCL12: {{.*}} +// OPENCL20: {{.*}} +// OPENCL30: {{.*}} +// OPENCL30-EXT: {{.*}} >From 71cae5793de97795db51f748c0be9198bcb65602 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Wed, 3 Jul 2024 15:15:17 +0530 Subject: [PATCH 4/9] Made some changes in comments in OpenCLBuiltinAllocaAddrSpace(). --- clang/lib/Sema/SemaChecking.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 0a354c9bc42f7..f6f02a50c87bc 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1490,9 +1490,7 @@ static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { if (S.getLangOpts().OpenCL) { RT = RT->getPointeeType(); - // __builtin_alloca* should always return pointer to stack/private - // Address Space, while for other builtins with return pointer type, - // it should depend on the OpenCL version. + // Stack Address space corresponds to private address space. LangAS openCLStackAS = LangAS::opencl_private; RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS); >From 6e510cc6c0bc2e3f1892a15b4d0e3d40c7f7bd77 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Wed, 10 Jul 2024 11:23:47 +0530 Subject: [PATCH 5/9] Removed non-OpenCL specific code snippet from OpenCLBuiltinAllocaAddrSpace(). --- clang/lib/Sema/SemaChecking.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f6f02a50c87bc..27bc68f0598c5 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1480,9 +1480,6 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, // In OpenCL, __builtin_alloca_* should return a pointer to address space // that corresponds to the stack address space i.e private address space. static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { - S.Diag(TheCall->getBeginLoc(), diag::warn_alloca) - << TheCall->getDirectCallee(); - QualType RT = TheCall->getType(); if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace()) return true; @@ -2231,6 +2228,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, [[fallthrough]]; case Builtin::BI__builtin_alloca: case Builtin::BI__builtin_alloca_uninitialized: + Diag(TheCall->getBeginLoc(), diag::warn_alloca) + << TheCall->getDirectCallee(); if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall)) return ExprError(); break; >From 375813eee33e1c2e00c1466fb3279bfff5e14f73 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Thu, 11 Jul 2024 13:56:13 +0530 Subject: [PATCH 6/9] Made suggested changes passing opencl_private directly. --- clang/lib/Sema/SemaChecking.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 27bc68f0598c5..31307c7fd73df 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1487,10 +1487,7 @@ static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { if (S.getLangOpts().OpenCL) { RT = RT->getPointeeType(); - // Stack Address space corresponds to private address space. - LangAS openCLStackAS = LangAS::opencl_private; - - RT = S.Context.getAddrSpaceQualType(RT, openCLStackAS); + RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private); TheCall->setType(S.Context.getPointerType(RT)); } >From a77dfff98dfd3d29636b4b428ac9f4267ab53211 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Mon, 22 Jul 2024 12:52:36 +0530 Subject: [PATCH 7/9] Made the suggested changes around the builtinAllocaAddrSpace function. --- clang/lib/Sema/SemaChecking.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 31307c7fd73df..62372e0c9874f 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1479,17 +1479,14 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, // In OpenCL, __builtin_alloca_* should return a pointer to address space // that corresponds to the stack address space i.e private address space. -static bool OpenCLBuiltinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { +static bool builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { QualType RT = TheCall->getType(); - if (!RT->isPointerType() || RT->getPointeeType().hasAddressSpace()) - return true; - - if (S.getLangOpts().OpenCL) { - RT = RT->getPointeeType(); + assert((RT->isPointerType() && !(RT->getPointeeType().hasAddressSpace())) && + "__builtin_alloca has invalid address space"); - RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private); - TheCall->setType(S.Context.getPointerType(RT)); - } + RT = RT->getPointeeType(); + RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private); + TheCall->setType(S.Context.getPointerType(RT)); return false; } @@ -2227,8 +2224,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_alloca_uninitialized: Diag(TheCall->getBeginLoc(), diag::warn_alloca) << TheCall->getDirectCallee(); - if (OpenCLBuiltinAllocaAddrSpace(*this, TheCall)) - return ExprError(); + if (getLangOpts().OpenCL) { + if (builtinAllocaAddrSpace(*this, TheCall)) + return ExprError(); + } break; case Builtin::BI__arithmetic_fence: if (BuiltinArithmeticFence(TheCall)) >From f500da93861d10a377dc998de387804eae6872fc Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Tue, 23 Jul 2024 11:47:54 +0530 Subject: [PATCH 8/9] Modified return type of builtinAllocaAddreSpace function. --- clang/lib/Sema/SemaChecking.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 62372e0c9874f..f40900050aa10 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1479,7 +1479,7 @@ static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, // In OpenCL, __builtin_alloca_* should return a pointer to address space // that corresponds to the stack address space i.e private address space. -static bool builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { +static void builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { QualType RT = TheCall->getType(); assert((RT->isPointerType() && !(RT->getPointeeType().hasAddressSpace())) && "__builtin_alloca has invalid address space"); @@ -1487,8 +1487,6 @@ static bool builtinAllocaAddrSpace(Sema &S, CallExpr *TheCall) { RT = RT->getPointeeType(); RT = S.Context.getAddrSpaceQualType(RT, LangAS::opencl_private); TheCall->setType(S.Context.getPointerType(RT)); - - return false; } namespace { @@ -2225,8 +2223,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, Diag(TheCall->getBeginLoc(), diag::warn_alloca) << TheCall->getDirectCallee(); if (getLangOpts().OpenCL) { - if (builtinAllocaAddrSpace(*this, TheCall)) - return ExprError(); + builtinAllocaAddrSpace(*this, TheCall); } break; case Builtin::BI__arithmetic_fence: >From 87c11538bc630d0245d8334f378b63d93ba18204 Mon Sep 17 00:00:00 2001 From: vg0204 <vikash.gu...@amd.com> Date: Fri, 26 Jul 2024 11:16:39 +0530 Subject: [PATCH 9/9] Removed unused version specific check-prefixes from builtins-alloca.cl test. --- clang/test/CodeGenOpenCL/builtins-alloca.cl | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGenOpenCL/builtins-alloca.cl b/clang/test/CodeGenOpenCL/builtins-alloca.cl index 02f5e0de8d1b0..474e95e74e006 100644 --- a/clang/test/CodeGenOpenCL/builtins-alloca.cl +++ b/clang/test/CodeGenOpenCL/builtins-alloca.cl @@ -1,12 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL12 %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL20 %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30 %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space \ -// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL,OPENCL30-EXT %s +// RUN: -emit-llvm -o - | FileCheck --check-prefixes=OPENCL %s // OPENCL-LABEL: define dso_local void @test1_builtin_alloca( // OPENCL-SAME: i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { @@ -139,8 +139,3 @@ void test2_builtin_alloca_with_align(unsigned n) { void test2_builtin_alloca_with_align_uninitialized(unsigned n) { __private void *alloc_ptr_align_uninitialized = __builtin_alloca_with_align_uninitialized(n, 8); } -//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -// OPENCL12: {{.*}} -// OPENCL20: {{.*}} -// OPENCL30: {{.*}} -// OPENCL30-EXT: {{.*}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits