yaxunl created this revision.
Herald added a subscriber: tpr.
Currently Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL
read_pipe builtin,
with appended type size and alignment arguments, where 2 or 4 indicates the
original
number of arguments.
For certain targets (e.g. amdgpu), there are optimized version of
__read_pipe_2/__read_pipe_4
when the type size and alignment has the same power of 2 value. It is desired
that Clang
emits a different function for these cases.
This patch let Clang emits __read_pipe_2_N for such cases where N is the size
in bytes of
the type. (N = 1,2,4,8,...,128), so that the target runtime can use an
optimized version of
read_pipe.
The same with __read_pipe_4, __write_pipe_2 and __wirte_pipe_4.
This optimization is controlled by TargetCodeGenInfo::hasOptimizedPipeBuiltin,
which returns
false by default. Each target can override this function to turn on this
optimization.
https://reviews.llvm.org/D36327
Files:
lib/CodeGen/CGBuiltin.cpp
lib/CodeGen/TargetInfo.cpp
lib/CodeGen/TargetInfo.h
test/CodeGenOpenCL/pipe_builtin.cl
Index: test/CodeGenOpenCL/pipe_builtin.cl
===================================================================
--- test/CodeGenOpenCL/pipe_builtin.cl
+++ test/CodeGenOpenCL/pipe_builtin.cl
@@ -1,73 +1,90 @@
-// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,NAMD %s
+// RUN: %clang_cc1 -triple amdgcn---amdgizcl -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,AMD %s
// CHECK: %opencl.pipe_t = type opaque
// CHECK: %opencl.reserve_id_t = type opaque
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+typedef struct {
+ int x[100];
+} S;
+
+typedef long long2 __attribute__((ext_vector_type(2)));
+typedef long long3 __attribute__((ext_vector_type(3)));
+typedef long long4 __attribute__((ext_vector_type(4)));
+typedef long long8 __attribute__((ext_vector_type(8)));
+typedef long long16 __attribute__((ext_vector_type(16)));
+
void test1(read_only pipe int p, global int *ptr) {
- // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // NAMD: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // AMD: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}})
read_pipe(p, ptr);
- // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = reserve_read_pipe(p, 2);
- // CHECK: call i32 @__read_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // NAMD: call i32 @__read_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // AMD: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i32* %{{.*}})
read_pipe(p, rid, 2, ptr);
- // CHECK: call void @__commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__commit_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4)
commit_read_pipe(p, rid);
}
void test2(write_only pipe int p, global int *ptr) {
- // CHECK: call i32 @__write_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // NAMD: call i32 @__write_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // AMD: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32* %{{.*}})
write_pipe(p, ptr);
- // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = reserve_write_pipe(p, 2);
- // CHECK: call i32 @__write_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // NAMD: call i32 @__write_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // AMD: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i32* %{{.*}})
write_pipe(p, rid, 2, ptr);
- // CHECK: call void @__commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__commit_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
commit_write_pipe(p, rid);
}
void test3(read_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = work_group_reserve_read_pipe(p, 2);
- // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4)
work_group_commit_read_pipe(p, rid);
}
void test4(write_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = work_group_reserve_write_pipe(p, 2);
- // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4)
work_group_commit_write_pipe(p, rid);
}
void test5(read_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = sub_group_reserve_read_pipe(p, 2);
- // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4)
sub_group_commit_read_pipe(p, rid);
}
void test6(write_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = sub_group_reserve_write_pipe(p, 2);
- // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_t{{.*}}* %{{.*}}, %opencl.reserve_id_t{{.*}}* %{{.*}}, i32 4, i32 4)
sub_group_commit_write_pipe(p, rid);
}
void test7(write_only pipe int p, global int *ptr) {
- // CHECK: call i32 @__get_pipe_num_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__get_pipe_num_packets(%opencl.pipe_t{{.*}}* %{{.*}}, i32 4, i32 4)
*ptr = get_pipe_num_packets(p);
- // CHECK: call i32 @__get_pipe_max_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__get_pipe_max_packets(%opencl.pipe_t{{.*}}* %{{.*}}, i32 4, i32 4)
*ptr = get_pipe_max_packets(p);
}
void test8(read_only pipe int r, write_only pipe int w, global int *ptr) {
// verify that return type is correctly casted to i1 value
- // CHECK: %[[R:[0-9]+]] = call i32 @__read_pipe_2
+ // NAMD: %[[R:[0-9]+]] = call i32 @__read_pipe_2
+ // AMD: %[[R:[0-9]+]] = call i32 @__read_pipe_2_4
// CHECK: icmp ne i32 %[[R]], 0
if (read_pipe(r, ptr)) *ptr = -1;
- // CHECK: %[[W:[0-9]+]] = call i32 @__write_pipe_2
+ // NAMD: %[[W:[0-9]+]] = call i32 @__write_pipe_2
+ // AMD: %[[W:[0-9]+]] = call i32 @__write_pipe_2_4
// CHECK: icmp ne i32 %[[W]], 0
if (write_pipe(w, ptr)) *ptr = -1;
// CHECK: %[[N:[0-9]+]] = call i32 @__get_pipe_num_packets
@@ -77,3 +94,33 @@
// CHECK: icmp ne i32 %[[M]], 0
if (get_pipe_max_packets(w)) *ptr = -1;
}
+
+// CHECK-LABEL: @test9
+void test9(read_only pipe char p1, global char *ptr1,
+ read_only pipe short p2, global short *ptr2,
+ read_only pipe int p4, global int *ptr4,
+ read_only pipe long p8, global long *ptr8,
+ read_only pipe long2 p16, global long2 *ptr16,
+ read_only pipe long4 p32, global long4 *ptr32,
+ read_only pipe long8 p64, global long8 *ptr64,
+ read_only pipe long16 p128, global long16 *ptr128,
+ read_only pipe S pu, global S *ptru) {
+ // AMD: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* {{.*}}, i8* %{{.*}})
+ read_pipe(p1, ptr1);
+ // AMD: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* {{.*}}, i16* %{{.*}})
+ read_pipe(p2, ptr2);
+ // AMD: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* {{.*}}, i32* %{{.*}})
+ read_pipe(p4, ptr4);
+ // AMD: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* {{.*}}, i64* %{{.*}})
+ read_pipe(p8, ptr8);
+ // AMD: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64>* %{{.*}})
+ read_pipe(p16, ptr16);
+ // AMD: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64>* %{{.*}})
+ read_pipe(p32, ptr32);
+ // AMD: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64>* %{{.*}})
+ read_pipe(p64, ptr64);
+ // AMD: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64>* %{{.*}})
+ read_pipe(p128, ptr128);
+ // AMD: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8* %{{.*}}, i32 400, i32 4)
+ read_pipe(pu, ptru);
+}
Index: lib/CodeGen/TargetInfo.h
===================================================================
--- lib/CodeGen/TargetInfo.h
+++ lib/CodeGen/TargetInfo.h
@@ -260,6 +260,10 @@
virtual llvm::Constant *
performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *V, unsigned SrcAddr,
unsigned DestAddr, llvm::Type *DestTy) const;
+
+ /// Whether the target support optimized read_pipe and write_pipe builtin
+ /// functions when type size and alignment is power of 2.
+ virtual bool hasOptimizedOpenCLPipeBuiltin() const { return false; }
};
} // namespace CodeGen
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -7430,6 +7430,7 @@
}
unsigned getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
+ bool hasOptimizedOpenCLPipeBuiltin() const override { return true; }
};
}
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -2392,46 +2392,95 @@
CGOpenCLRuntime OpenCLRT(CGM);
Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
+ unsigned Size = cast<llvm::ConstantInt>(PacketSize)->getZExtValue();
+ unsigned Align = cast<llvm::ConstantInt>(PacketAlign)->getZExtValue();
+ bool Opt = Size == Align && isPowerOf2_32(Size) &&
+ getTargetHooks().hasOptimizedOpenCLPipeBuiltin();
// Type of the generic packet parameter.
unsigned GenericAS =
getContext().getTargetAddressSpace(LangAS::opencl_generic);
- llvm::Type *I8PTy = llvm::PointerType::get(
- llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
+ llvm::Type *PtrElemTy;
+ if (!Opt)
+ PtrElemTy = llvm::Type::getInt8Ty(getLLVMContext());
+ else if (Size <= 8)
+ PtrElemTy = llvm::Type::getIntNTy(getLLVMContext(), Size * 8);
+ else
+ PtrElemTy = llvm::VectorType::get(
+ llvm::Type::getInt64Ty(getLLVMContext()), Size / 8);
+ llvm::Type *PtrTy = llvm::PointerType::get(PtrElemTy, GenericAS);
// Testing which overloaded version we should generate the call for.
if (2U == E->getNumArgs()) {
- const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
+ std::string Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
: "__write_pipe_2";
+ llvm::SmallVector<llvm::Type *, 4> ArgTys;
+ ArgTys.push_back(Arg0->getType());
+ ArgTys.push_back(PtrTy);
+
+ if (Opt) {
+ Name = Name + "_" + std::to_string(Size);
+ } else {
+ ArgTys.push_back(Int32Ty);
+ ArgTys.push_back(Int32Ty);
+ }
+
// Creating a generic function type to be able to call with any builtin or
// user defined type.
- llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
+ Value *BCast = Builder.CreatePointerCast(Arg1, PtrTy);
+
+ llvm::SmallVector<llvm::Value *, 4> Args;
+ Args.push_back(Arg0);
+ Args.push_back(BCast);
+ if (!Opt) {
+ Args.push_back(PacketSize);
+ Args.push_back(PacketAlign);
+ }
+
return RValue::get(
- Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, BCast, PacketSize, PacketAlign}));
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
} else {
assert(4 == E->getNumArgs() &&
"Illegal number of parameters to pipe function");
- const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
+ std::string Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
: "__write_pipe_4";
+ llvm::SmallVector<llvm::Type *, 6> ArgTys;
+ ArgTys.push_back(Arg0->getType());
+ ArgTys.push_back(Arg1->getType());
+ ArgTys.push_back(Int32Ty);
+ ArgTys.push_back(PtrTy);
+
+ if (Opt) {
+ Name = Name + "_" + std::to_string(Size);
+ } else {
+ ArgTys.push_back(Int32Ty);
+ ArgTys.push_back(Int32Ty);
+ }
- llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
- Int32Ty, Int32Ty};
Value *Arg2 = EmitScalarExpr(E->getArg(2)),
*Arg3 = EmitScalarExpr(E->getArg(3));
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
+ Value *BCast = Builder.CreatePointerCast(Arg3, PtrTy);
// We know the third argument is an integer type, but we may need to cast
// it to i32.
if (Arg2->getType() != Int32Ty)
Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
- return RValue::get(Builder.CreateCall(
- CGM.CreateRuntimeFunction(FTy, Name),
- {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
+
+ llvm::SmallVector<llvm::Value *, 6> Args;
+ Args.push_back(Arg0);
+ Args.push_back(Arg1);
+ Args.push_back(Arg2);
+ Args.push_back(BCast);
+ if (!Opt) {
+ Args.push_back(PacketSize);
+ Args.push_back(PacketAlign);
+ }
+
+ return RValue::get(
+ Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Args));
}
}
// OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits