https://github.com/changpeng updated https://github.com/llvm/llvm-project/pull/148991
>From 074800e1906bcce1cc0110c759a6d141ce4ea322 Mon Sep 17 00:00:00 2001 From: Changpeng Fang <changpeng.f...@amd.com> Date: Tue, 15 Jul 2025 16:37:20 -0700 Subject: [PATCH 1/2] AMDGPU: Implement builtins for gfx1250 wmma instructions Co-Authored-by: Stanislav Mekhanoshin <stanislav.mekhanos...@amd.com> Co-Authored-by: Shilei Tian <shilei.t...@amd.com> --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 40 ++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 199 +++++++- .../builtins-amdgcn-gfx1250-wmma-w32.cl | 433 ++++++++++++++++++ ...ins-amdgcn-error-gfx1250-wmma-w32-param.cl | 242 ++++++++++ 4 files changed, 913 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 71e4b3486167a..29e1e99bba9ef 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -676,5 +676,45 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts") +// GFX1250 WMMA builtins +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x4_f32, "V8fIbV2fIbV2fIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_bf16, "V8fIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16_16x16x32_bf16, "V8yIbV16yIbV16yIsV8yIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16, "V8yIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8, "V8fV8iV8iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8, "V8hV8iV8iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_i32_16x16x64_iu8, "V8iIbV8iIbV8iV8iIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_f16, "V8fIbV16hIbV16hIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x32_f16, "V8hIbV16hIbV16hIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_32x16x128_f4, "V16fV16iV8iIsV16f", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_bf16, "V8fIbV16yIbV32yV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16_16x16x64_bf16, "V8yIbV16yIbV32yV8yiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16, "V8fIbV16yIbV32yV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8, "V8fV8iV16iV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8, "V8hV8iV16iV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_i32_16x16x128_iu8, "V8iIbV8iIbV16iV8iiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x64_f16, "V8fIbV16hIbV32hV8fiIbIb", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_swmmac_f16_16x16x64_f16, "V8hIbV16hIbV32hV8hiIbIb", "nc", "gfx1250-insts,wavefrontsize32") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 0d8c2ed284994..e1f9cbe7aea26 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -822,7 +822,46 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: - case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: { + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: + // GFX1250 WMMA builtins + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16: + case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8: + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16: + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16: + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8: + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8: + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: { // These operations perform a matrix multiplication and accumulation of // the form: @@ -837,6 +876,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // "false". bool AppendFalseForOpselArg = false; unsigned BuiltinWMMAOp; + // Need return type when D and C are of different types. + bool NeedReturnType = false; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: @@ -975,6 +1016,160 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8; break; + // GFX1250 WMMA builtins + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32: + ArgsForMatchingMatrixTypes = {5, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16: + ArgsForMatchingMatrixTypes = {5, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16: + ArgsForMatchingMatrixTypes = {5, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16: + ArgsForMatchingMatrixTypes = {5, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16: + ArgsForMatchingMatrixTypes = {5, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16: + NeedReturnType = true; + ArgsForMatchingMatrixTypes = {1, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8: + ArgsForMatchingMatrixTypes = {3, 0}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8: + ArgsForMatchingMatrixTypes = {4, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8; + break; + case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4: + ArgsForMatchingMatrixTypes = {3, 0, 1}; + BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8: + ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8; + break; + case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: + ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; + BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8; + break; } SmallVector<Value *, 6> Args; @@ -984,6 +1179,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Args.push_back(Builder.getFalse()); SmallVector<llvm::Type *, 6> ArgTypes; + if (NeedReturnType) + ArgTypes.push_back(ConvertType(E->getType())); for (auto ArgIdx : ArgsForMatchingMatrixTypes) ArgTypes.push_back(Args[ArgIdx]->getType()); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl new file mode 100644 index 0000000000000..e4ef3defdb341 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl @@ -0,0 +1,433 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -target-feature +wavefrontsize32 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 + +typedef float v16f __attribute__((ext_vector_type(16))); +typedef float v8f __attribute__((ext_vector_type(8))); +typedef float v2f __attribute__((ext_vector_type(2))); +typedef half v8h __attribute__((ext_vector_type(8))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef __bf16 v32bf16 __attribute__((ext_vector_type(32))); +typedef __bf16 v16bf16 __attribute__((ext_vector_type(16))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); +typedef int v16i __attribute__((ext_vector_type(16))); +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x4_f32( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x4.f32.v8f32.v2f32(i1 false, <2 x float> [[A:%.*]], i1 false, <2 x float> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x4_f32(global v8f* out, v2f a, v2f b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x4_f32(0, a, 0, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x32_bf16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x32.bf16.v8f32.v16bf16(i1 false, <16 x bfloat> [[A:%.*]], i1 false, <16 x bfloat> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 true, i1 false) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x32_bf16(global v8f* out, v16bf16 a, v16bf16 b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x32_bf16(0, a, 0, b, 0, c, true, false); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_bf16_16x16x32_bf16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x bfloat> @llvm.amdgcn.wmma.bf16.16x16x32.bf16.v8bf16.v16bf16(i1 false, <16 x bfloat> [[A:%.*]], i1 false, <16 x bfloat> [[B:%.*]], i16 0, <8 x bfloat> [[C:%.*]], i1 false, i1 false) +// CHECK-GFX1250-NEXT: store <8 x bfloat> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_bf16_16x16x32_bf16(global v8bf16* out, v16bf16 a, v16bf16 b, v8bf16 c) +{ + *out = __builtin_amdgcn_wmma_bf16_16x16x32_bf16(0, a, 0, b, 0, c, false, false); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_bf16f32_16x16x32_bf16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x bfloat> @llvm.amdgcn.wmma.bf16f32.16x16x32.bf16.v8bf16.v16bf16.v8f32(i1 false, <16 x bfloat> [[A:%.*]], i1 false, <16 x bfloat> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x bfloat> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_bf16f32_16x16x32_bf16(global v8bf16* out, v16bf16 a, v16bf16 b, v8f c) +{ + *out = __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16(0, a, 0, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x64_fp8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x64.fp8.fp8.v8f32.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 true, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x64_fp8_fp8(global v8f* out, v8i a, v8i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8(a, b, 0, c, true, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x64_fp8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x64.fp8.bf8.v8f32.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x64_fp8_bf8(global v8f* out, v8i a, v8i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x64_bf8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x64.bf8.fp8.v8f32.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x64_bf8_fp8(global v8f* out, v8i a, v8i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x64_bf8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x64.bf8.bf8.v8f32.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x64_bf8_bf8(global v8f* out, v8i a, v8i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x64_fp8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x64.fp8.fp8.v8f16.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x64_fp8_fp8(global v8h* out, v8i a, v8i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x64_fp8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x64.fp8.bf8.v8f16.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x64_fp8_bf8(global v8h* out, v8i a, v8i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x64_bf8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x64.bf8.fp8.v8f16.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x64_bf8_fp8(global v8h* out, v8i a, v8i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x64_bf8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x64.bf8.bf8.v8f16.v8i32(<8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h* out, v8i a, v8i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_i32_16x16x64_iu8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <8 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c) +{ + *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x32_f16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x32.f16.v8f32.v16f16(i1 false, <16 x half> [[A:%.*]], i1 false, <16 x half> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x32_f16(global v8f* out, v16h a, v16h b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(0, a, 0, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x32_f16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x32.f16.v8f16.v16f16(i1 false, <16 x half> [[A:%.*]], i1 false, <16 x half> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x32_f16(global v8h* out, v16h a, v16h b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x32_f16(0, a, 0, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x128_fp8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.fp8.fp8.v8f16.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x128_fp8_fp8(global v8h* out, v16i a, v16i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x128_fp8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.fp8.bf8.v8f16.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x128_fp8_bf8(global v8h* out, v16i a, v16i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x128_bf8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.bf8.fp8.v8f16.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x128_bf8_fp8(global v8h* out, v16i a, v16i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f16_16x16x128_bf8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.bf8.bf8.v8f16.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x half> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f16_16x16x128_bf8_bf8(global v8h* out, v16i a, v16i b, v8h c) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_fp8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.fp8.fp8.v8f32.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 true, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x128_fp8_fp8(global v8f* out, v16i a, v16i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8(a, b, 0, c, true, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_fp8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.fp8.bf8.v8f32.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x128_fp8_bf8(global v8f* out, v16i a, v16i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_bf8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.bf8.fp8.v8f32.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x128_bf8_fp8(global v8f* out, v16i a, v16i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_bf8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.bf8.bf8.v8f32.v16i32(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_16x16x128_bf8_bf8(global v8f* out, v16i a, v16i b, v8f c) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8(a, b, 0, c, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_wmma_f32_32x16x128_f4( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <16 x float> @llvm.amdgcn.wmma.f32.32x16x128.f4.v16f32.v16i32.v8i32(<16 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], i16 0, <16 x float> [[C:%.*]]) +// CHECK-GFX1250-NEXT: store <16 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 64, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_wmma_f32_wmma_f32_32x16x128_f4(global v16f* out, v16i a, v8i b, v16f c) +{ + *out = __builtin_amdgcn_wmma_f32_32x16x128_f4(a, b, 0, c); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x64_bf16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x64.bf16.v8f32.v16bf16.v32bf16.i32(i1 false, <16 x bfloat> [[A:%.*]], i1 false, <32 x bfloat> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f32_16x16x64_bf16(global v8f* out, v16bf16 a, v32bf16 b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a, 0, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_bf16_16x16x64_bf16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x bfloat> @llvm.amdgcn.swmmac.bf16.16x16x64.bf16.v8bf16.v16bf16.v32bf16.i32(i1 false, <16 x bfloat> [[A:%.*]], i1 false, <32 x bfloat> [[B:%.*]], <8 x bfloat> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x bfloat> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_bf16_16x16x64_bf16(global v8bf16* out, v16bf16 a, v32bf16 b, v8bf16 c, int index) +{ + *out = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a, 0, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_bf16f32_16x16x64_bf16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.bf16f32.16x16x64.bf16.v8f32.v16bf16.v32bf16.i32(i1 false, <16 x bfloat> [[A:%.*]], i1 false, <32 x bfloat> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_bf16f32_16x16x64_bf16(global v8f* out, v16bf16 a, v32bf16 b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a, 0, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x128_fp8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.fp8.fp8.v8f32.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f32_16x16x128_fp8_fp8(global v8f* out, v8i a, v16i b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x128_fp8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.fp8.bf8.v8f32.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f32_16x16x128_fp8_bf8(global v8f* out, v8i a, v16i b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x128_bf8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.bf8.fp8.v8f32.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f32_16x16x128_bf8_fp8(global v8f* out, v8i a, v16i b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x128_bf8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x128.bf8.bf8.v8f32.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f32_16x16x128_bf8_bf8(global v8f* out, v8i a, v16i b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f16_16x16x128_fp8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.fp8.fp8.v8f16.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f16_16x16x128_fp8_fp8(global v8h* out, v8i a, v16i b, v8h c, int index) +{ + *out = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f16_16x16x128_fp8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.fp8.bf8.v8f16.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f16_16x16x128_fp8_bf8(global v8h* out, v8i a, v16i b, v8h c, int index) +{ + *out = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f16_16x16x128_bf8_fp8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.bf8.fp8.v8f16.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f16_16x16x128_bf8_fp8(global v8h* out, v8i a, v16i b, v8h c, int index) +{ + *out = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f16_16x16x128_bf8_bf8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x128.bf8.bf8.v8f16.v8i32.v16i32.i32(<8 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f16_16x16x128_bf8_bf8(global v8h* out, v8i a, v16i b, v8h c, int index) +{ + *out = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8(a, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_i32_16x16x128_iu8( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x128.iu8.v8i32.v8i32.v16i32.i32(i1 false, <8 x i32> [[A:%.*]], i1 false, <16 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_i32_16x16x128_iu8(global v8i* out, v8i a, v16i b, v8i c, int index) +{ + *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f32_16x16x64_f16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.swmmac.f32.16x16x64.f16.v8f32.v16f16.v32f16.i32(i1 false, <16 x half> [[A:%.*]], i1 false, <32 x half> [[B:%.*]], <8 x float> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f32_16x16x64_f16(global v8f* out, v16h a, v32h b, v8f c, int index) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a, 0, b, c, index, false, true); +} + +// CHECK-GFX1250-LABEL: @test_amdgcn_swmmac_f16_16x16x64_f16( +// CHECK-GFX1250-NEXT: entry: +// CHECK-GFX1250-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.amdgcn.swmmac.f16.16x16x64.f16.v8f16.v16f16.v32f16.i32(i1 false, <16 x half> [[A:%.*]], i1 false, <32 x half> [[B:%.*]], <8 x half> [[C:%.*]], i32 [[INDEX:%.*]], i1 false, i1 true) +// CHECK-GFX1250-NEXT: store <8 x half> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]] +// CHECK-GFX1250-NEXT: ret void +// +void test_amdgcn_swmmac_f16_16x16x64_f16(global v8h* out, v16h a, v32h b, v8h c, int index) +{ + *out = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a, 0, b, c, index, false, true); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl new file mode 100644 index 0000000000000..55d705e6ad238 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl @@ -0,0 +1,242 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -verify -emit-llvm -o - %s + +typedef float v16f __attribute__((ext_vector_type(16))); +typedef float v8f __attribute__((ext_vector_type(8))); +typedef float v2f __attribute__((ext_vector_type(2))); +typedef half v8h __attribute__((ext_vector_type(8))); +typedef half v16h __attribute__((ext_vector_type(16))); +typedef half v32h __attribute__((ext_vector_type(32))); +typedef __bf16 v32bf16 __attribute__((ext_vector_type(32))); +typedef __bf16 v16bf16 __attribute__((ext_vector_type(16))); +typedef __bf16 v8bf16 __attribute__((ext_vector_type(8))); +typedef int v16i __attribute__((ext_vector_type(16))); +typedef int v8i __attribute__((ext_vector_type(8))); + +void test_amdgcn_wmma_f32_16x16x4_f32(global v8f* out, v2f a, v2f b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x4_f32(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x4_f32' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x4_f32(0, a, mod, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x4_f32' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x4_f32(0, a, 0, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x4_f32' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x4_f32(0, a, 0, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x4_f32' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x4_f32(0, a, 0, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x4_f32' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x32_bf16(global v8f* out, v16bf16 a, v16bf16 b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x32_bf16(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_bf16(0, a, mod, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_bf16(0, a, 0, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_bf16(0, a, 0, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_bf16(0, a, 0, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_bf16' must be a constant integer}} +} + +void test_amdgcn_wmma_bf16_16x16x32_bf16(global v8bf16* out, v16bf16 a, v16bf16 b, v8bf16 c, int mod) +{ + *out = __builtin_amdgcn_wmma_bf16_16x16x32_bf16(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_bf16_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16_16x16x32_bf16(0, a, mod, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_bf16_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16_16x16x32_bf16(0, a, 0, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_bf16_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16_16x16x32_bf16(0, a, 0, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_bf16_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16_16x16x32_bf16(0, a, 0, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_bf16_16x16x32_bf16' must be a constant integer}} +} + +void test_amdgcn_wmma_bf16f32_16x16x32_bf16(global v8bf16* out, v16bf16 a, v16bf16 b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16(0, a, mod, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16(0, a, 0, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16(0, a, 0, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_bf16f32_16x16x32_bf16(0, a, 0, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x64_fp8_fp8(global v8f* out, v8i a, v8i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x64_fp8_bf8(global v8f* out, v8i a, v8i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x64_bf8_fp8(global v8f* out, v8i a, v8i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x64_bf8_bf8(global v8f* out, v8i a, v8i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x64_fp8_fp8(global v8h* out, v8i a, v8i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x64_fp8_bf8(global v8h* out, v8i a, v8i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x64_bf8_fp8(global v8h* out, v8i a, v8i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x64_bf8_bf8(global v8h* out, v8i a, v8i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c, int mod) +{ + *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(mod, a, 0, b, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, mod, b, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x32_f16(global v8f* out, v16h a, v16h b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(0, a, mod, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(0, a, 0, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(0, a, 0, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(0, a, 0, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_f16' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x32_f16(global v8h* out, v16h a, v16h b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x32_f16(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x32_f16(0, a, mod, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x32_f16(0, a, 0, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x32_f16(0, a, 0, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x32_f16' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x32_f16(0, a, 0, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x32_f16' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x128_fp8_fp8(global v8h* out, v16i a, v16i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x128_fp8_bf8(global v8h* out, v16i a, v16i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x128_bf8_fp8(global v8h* out, v16i a, v16i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f16_16x16x128_bf8_bf8(global v8h* out, v16i a, v16i b, v8h c, int mod) +{ + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x128_fp8_fp8(global v8f* out, v16i a, v16i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x128_fp8_bf8(global v8f* out, v16i a, v16i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x128_bf8_fp8(global v8f* out, v16i a, v16i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_16x16x128_bf8_bf8(global v8f* out, v16i a, v16i b, v8f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8(a, b, mod, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8(a, b, 0, c, mod, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8' must be a constant integer}} + *out = __builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8(a, b, 0, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8' must be a constant integer}} +} + +void test_amdgcn_wmma_f32_wmma_f32_32x16x128_f4(global v16f* out, v16i a, v8i b, v16f c, int mod) +{ + *out = __builtin_amdgcn_wmma_f32_32x16x128_f4(a, b, mod, c); // expected-error {{'__builtin_amdgcn_wmma_f32_32x16x128_f4' must be a constant integer}} +} + +void test_amdgcn_swmmac_f32_16x16x64_bf16(global v8f* out, v16bf16 a, v32bf16 b, v8f c, int index, int mod) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(mod, a, 0, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' must be a constant integer}} +} + +void test_amdgcn_swmmac_bf16_16x16x64_bf16(global v8bf16* out, v16bf16 a, v32bf16 b, v8bf16 c, int index, int mod) +{ + *out = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(mod, a, 0, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' must be a constant integer}} +} + +void test_amdgcn_swmmac_bf16f32_16x16x64_bf16(global v8f* out, v16bf16 a, v32bf16 b, v8f c, int index, int mod) +{ + *out = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(mod, a, 0, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' must be a constant integer}} +} + +void test_amdgcn_swmmac_i32_16x16x128_iu8(global v8i* out, v8i a, v16i b, v8i c, int index, int mod) +{ + *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(mod, a, 0, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' must be a constant integer}} +} + +void test_amdgcn_swmmac_f32_16x16x64_f16(global v8f* out, v16h a, v32h b, v8f c, int index, int mod) +{ + *out = __builtin_amdgcn_swmmac_f32_16x16x64_f16(mod, a, 0, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' must be a constant integer}} +} + +void test_amdgcn_swmmac_f16_16x16x64_f16(global v8h* out, v16h a, v32h b, v8h c, int index, int mod) +{ + *out = __builtin_amdgcn_swmmac_f16_16x16x64_f16(mod, a, 0, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a, mod, b, c, index, false, false); // expected-error {{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a, 0, b, c, index, mod, false); // expected-error {{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' must be a constant integer}} + *out = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a, 0, b, c, index, false, mod); // expected-error {{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' must be a constant integer}} +} >From 16f76445dba1c7b362a1e06dd808e9874d1bac83 Mon Sep 17 00:00:00 2001 From: Changpeng Fang <changpeng.f...@amd.com> Date: Tue, 15 Jul 2025 16:50:00 -0700 Subject: [PATCH 2/2] AMDGPU: Implement builtins for gfx1250 wmma instructions Fix a format error --- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index e1f9cbe7aea26..dcfdea648e93c 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -1090,7 +1090,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, ArgsForMatchingMatrixTypes = {3, 0}; BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8; break; - case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8: + case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8: ArgsForMatchingMatrixTypes = {3, 0}; BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8; break; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits