================ @@ -0,0 +1,64 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s + +// Test that, depending on triple and, if applicable, target-cpu, one of three +// things happens: +// 1) for gfx900 we emit an empty kernel (concrete target, lacks feature) +// 2) for gfx1010 we emit a call to trap (concrete target, has feature) +// 3) for AMDGCNSPIRV we emit llvm.amdgcn.has.gfx10-insts as a constant +// externally initialised bool global, and load from it to provide the +// condition to a br (abstract target) + +//. +// AMDGCNSPIRV: @llvm.amdgcn.has.gfx10-insts = external addrspace(1) externally_initialized constant i1 +//. +// AMDGCN-GFX900-LABEL: define dso_local void @foo( +// AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] { +// AMDGCN-GFX900-NEXT: [[ENTRY:.*:]] +// AMDGCN-GFX900-NEXT: ret void +// +// AMDGCN-GFX1010-LABEL: define dso_local void @foo( +// AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] { +// AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]] +// AMDGCN-GFX1010-NEXT: call void @llvm.trap() +// AMDGCN-GFX1010-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @foo( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i1, ptr addrspace(1) @llvm.amdgcn.has.gfx10-insts, align 1 +// AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false +// AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]] +// AMDGCNSPIRV: [[IF_THEN]]: +// AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap() +// AMDGCNSPIRV-NEXT: br label %[[IF_END]] +// AMDGCNSPIRV: [[IF_END]]: +// AMDGCNSPIRV-NEXT: ret void +// +void foo() { + if (__builtin_amdgcn_is_invocable(__builtin_amdgcn_permlanex16)) ---------------- jhuber6 wrote:
The issue with how the ROCm device libs does it, is that certain builtins require target features to be used. It hacks around this with the `__attribute__((target))`. I just want to know that you can call a builtin that requires `+ddp` features without that. https://github.com/llvm/llvm-project/pull/134016 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits