arsenm created this revision. arsenm added reviewers: yaxunl, rampitec, b-sumner. Herald added subscribers: t-tye, tpr, dstuttard, nhaehnle, wdng, jvesely, kzhuravl.
https://reviews.llvm.org/D57349 Files: include/clang/Basic/BuiltinsAMDGPU.def lib/CodeGen/CGBuiltin.cpp test/CodeGenOpenCL/builtins-amdgcn.cl Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -536,6 +536,18 @@ *out = __builtin_amdgcn_s_getpc(); } +// CHECK-LABEL: @test_ds_append_lds( +// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_append_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_append(ptr); +} + +// CHECK-LABEL: @test_ds_consume_lds( +// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_consume_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_consume(ptr); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -12478,6 +12478,14 @@ case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -98,6 +98,8 @@ BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n") +BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n") //===----------------------------------------------------------------------===// // CI+ only builtins.
Index: test/CodeGenOpenCL/builtins-amdgcn.cl =================================================================== --- test/CodeGenOpenCL/builtins-amdgcn.cl +++ test/CodeGenOpenCL/builtins-amdgcn.cl @@ -536,6 +536,18 @@ *out = __builtin_amdgcn_s_getpc(); } +// CHECK-LABEL: @test_ds_append_lds( +// CHECK: call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_append_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_append(ptr); +} + +// CHECK-LABEL: @test_ds_consume_lds( +// CHECK: call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %ptr, i1 false) +kernel void test_ds_consume_lds(global int* out, local int* ptr) { + *out = __builtin_amdgcn_ds_consume(ptr); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -12478,6 +12478,14 @@ case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3); + case AMDGPU::BI__builtin_amdgcn_ds_append: + case AMDGPU::BI__builtin_amdgcn_ds_consume: { + Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? + Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; + Value *Src0 = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); + return Builder.CreateCall(F, { Src0, Builder.getFalse() }); + } case AMDGPU::BI__builtin_amdgcn_read_exec: { CallInst *CI = cast<CallInst>( EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); Index: include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- include/clang/Basic/BuiltinsAMDGPU.def +++ include/clang/Basic/BuiltinsAMDGPU.def @@ -98,6 +98,8 @@ BUILTIN(__builtin_amdgcn_ds_faddf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") +BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n") +BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n") //===----------------------------------------------------------------------===// // CI+ only builtins.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits