[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
@@ -1,10 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0 -; GCN-LABEL: {{^}}v_alignbyte_b32: -; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} define amdgpu_kernel void @v_alignbyte_b32(ptr addrspace(1) %out, i32 %src1, i32 %src2, i32 %src3) #1 { +; GCN-LABEL: v_alignbyte_b32: +; GCN: ; %bb.0: +; GCN-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0xb +; GCN-NEXT:s_load_dwordx2 s[4:5], s[4:5], 0x9 +; GCN-NEXT:s_mov_b32 s7, 0xf000 +; GCN-NEXT:s_mov_b32 s6, -1 +; GCN-NEXT:s_waitcnt lgkmcnt(0) +; GCN-NEXT:v_mov_b32_e32 v0, s1 +; GCN-NEXT:v_mov_b32_e32 v1, s2 +; GCN-NEXT:v_alignbyte_b32 v0, s0, v0, v1 +; GCN-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT:s_endpgm +; +; GFX11-TRUE16-LABEL: v_alignbyte_b32: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT:s_clause 0x1 +; GFX11-TRUE16-NEXT:s_load_b128 s[0:3], s[4:5], 0x2c +; GFX11-TRUE16-NEXT:s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-TRUE16-NEXT:v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT:s_waitcnt lgkmcnt(0) +; GFX11-TRUE16-NEXT:v_mov_b16_e32 v0.l, s2 +; GFX11-TRUE16-NEXT:s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT:v_alignbyte_b32 v0, s0, s1, v0.l broxigarchen wrote: Added the test. It seems still there is additional `mov` being generated and the s2 is not folded into src2. I guess we might need to address the RA first? https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
broxigarchen wrote: Hi @arsenm @kosarev can you help to review this PR? Thanks! https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #125706)
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/125706 Support true16 format for v_alignbyte_b32 in MC and CodeGen >From 1e63e17cfe20f809045e7209a870b24bd15b5a91 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/5] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 11fa295dad9524..8a02eeb64fd22a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19912,6 +19912,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index eb7bde69994913..59a7480103a4f4 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,7 +2353,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index ce73e0ca361d9b..f567c528b76df7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1690,7 +1694,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index e55fbfc6e18c8c..857a1359b00d99 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #125706)
broxigarchen wrote: Hi @Sisyph @kosarev @arsenm The previous PR of this patch https://github.com/llvm/llvm-project/pull/119750 is stucked and the new commit is not able be displayed. Closed and reopen it here. https://github.com/llvm/llvm-project/pull/125706 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen closed https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #125706)
broxigarchen wrote: > Same suggestion as in [#119750 > (comment)](https://github.com/llvm/llvm-project/pull/119750#discussion_r1941297212). I see. I think this is better to be done seperately https://github.com/llvm/llvm-project/pull/125706 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #125706)
https://github.com/broxigarchen closed https://github.com/llvm/llvm-project/pull/125706 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
@@ -3802,6 +3802,26 @@ def : FPMinCanonMaxPat, fmaximum_oneuse>; } +let True16Predicate = UseFakeTrue16Insts in +def : GCNPat < +(i32 (int_amdgcn_alignbyte (i32 (VOP3OpSelMods i32:$src0, i32:$src0_modifiers)), broxigarchen wrote: I tried this but tablegen complained about thte type of the src2 operand https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From 68ba9f46992b683dcff947e929e5050691acf0f1 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/4] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ec9d59bfed5cf..92e8ea9d23e12b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19792,6 +19792,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f721d5267cd2a0..fbbf9f74cc9c63 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,7 +2353,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c06c932a5375e8..a0d904ba9a3353 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1690,7 +1694,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 40e3fbda47787a..c166cb58d5cb52 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vc
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
@@ -3802,6 +3802,26 @@ def : FPMinCanonMaxPat, fmaximum_oneuse>; } +let True16Predicate = UseFakeTrue16Insts in +def : GCNPat < +(i32 (int_amdgcn_alignbyte (i32 (VOP3OpSelMods i32:$src0, i32:$src0_modifiers)), + (i32 (VOP3OpSelMods i32:$src1, i32:$src1_modifiers)), + (i32 (VOP3OpSelMods i32:$src2, i32:$src2_modifiers, +(V_ALIGNBYTE_B32_fake16_e64 i32:$src0_modifiers, VSrc_b32:$src0, +i32:$src1_modifiers, VSrc_b32:$src1, +i32:$src2_modifiers, VGPR_32:$src2) +>; + +let True16Predicate = UseRealTrue16Insts in broxigarchen wrote: moved to VOP3Instructions.td https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From dc1cc19d8d3cb2c41ca05a131f67bb576effb614 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/2] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index b649bab532f262..94cd631590ae97 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vc
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From 826cc5ccb8c4fb0d4edd53823725ba497ce86949 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/2] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4d4b7428abd505..1b6437b44e07be 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19593,6 +19593,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 34f90b33bc4ba4..ba5da9000879ae 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1679,7 +1683,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index d46f010a2dafbd..1dbdb9ed5b6fda 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vc
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From dc1cc19d8d3cb2c41ca05a131f67bb576effb614 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/2] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index b649bab532f262..94cd631590ae97 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vc
[clang] [llvm] True16 for v_alignbyte_b32 in MC (PR #119750)
https://github.com/broxigarchen created https://github.com/llvm/llvm-project/pull/119750 None >From 63323d742b5eadc2849086ce991aa4c609336ea7 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 16 files changed, 192 insertions(+), 45 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..839fad1e6a3d56 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ded5f6b5ac4fd3..db816da45b8b35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..b07e90a83e8613 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx1
[clang] [llvm] True16 for v_alignbyte_b32 in MC (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From 605578fbee47034a01869f42220dbd63631a5c87 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 16 files changed, 192 insertions(+), 45 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ded5f6b5ac4fd3..db816da45b8b35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..b07e90a83e8613 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From c0feab212b954a47ad7541a5eceb149c478b3341 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 8 ++-- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 17 files changed, 196 insertions(+), 49 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ded5f6b5ac4fd3..db816da45b8b35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..b07e90a83e8613 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index ecf03b14143ee3..639a0f72fa6152 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -2998,8 +2998,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { switch (I.getOpcode()) { case AMDGPU::V_ADDC_U32_e32: case AMDGPU::V_ADDC_U32_dpp: -case AMDGPU::V_CNDMASK_B16_e32: -case AMDGPU::V_CNDMASK_B16_dpp: +case AMDGPU::V_CNDMASK_B16_fake16_e32: +case AMDGPU::V_CNDMASK_B16_fake16_dpp: case AMDGPU::V_CNDMASK_B32_e32: case AMDGPU::V_CNDMASK_B32_dpp: case AMDGPU::V_DIV_FMAS_F32_e64: @@ -3014,8 +3014,8 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { HazardReg == AMDGPU::VCC_HI; case AMDGPU::V_ADDC_U32_e64: case AMDGPU::V_ADDC_U32_e64_dpp: -case AMDGPU::V_CNDMASK_B16_e64: -case AMDGPU::V_CNDMASK_B16_e64_dpp: +case AMDGPU::V_CNDMASK_B16_fake16_e64: +case AMDGPU::V_CNDMASK_B16_fake16_e64_dpp: case AMDGPU::V_CNDMASK_B32_e64: case AMDGPU::V_CNDMASK_B32_e64_dpp: case AMDGPU::V_SUBB_U32_e64: diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td ind
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From bddf957c544de2095ada423134eb8dc9fdfe2702 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 16 files changed, 192 insertions(+), 45 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ded5f6b5ac4fd3..db816da45b8b35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..b07e90a83e8613 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From dc1cc19d8d3cb2c41ca05a131f67bb576effb614 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/2] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index b649bab532f262..94cd631590ae97 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vc
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From de5e3b1233f6112c2f069dc9d7e02cb19283ee70 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 2 +- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 16 files changed, 191 insertions(+), 44 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index ded5f6b5ac4fd3..db816da45b8b35 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -734,7 +734,7 @@ kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { } // CHECK-LABEL: @test_alignbyte( -// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +// CHECK: tail call{{.*}} i32 @llvm.amdgcn.alignbyte.i32(i32 %src0, i32 %src1, i32 %src2) kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { *out = __builtin_amdgcn_alignbyte(src0, src1, src2); } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 :
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From dc1cc19d8d3cb2c41ca05a131f67bb576effb614 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index b649bab532f262..94cd631590ae97 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vcc_lo
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen ready_for_review https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen ready_for_review https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC][CodeGen] true16 for v_alignbyte_b32 (PR #119750)
@@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], broxigarchen wrote: Sorry for the delay. Updated the codegen pattern and reverted the intrinsic change https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From 68ba9f46992b683dcff947e929e5050691acf0f1 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/4] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ec9d59bfed5cf..92e8ea9d23e12b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19792,6 +19792,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f721d5267cd2a0..fbbf9f74cc9c63 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,7 +2353,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c06c932a5375e8..a0d904ba9a3353 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1690,7 +1694,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 40e3fbda47787a..c166cb58d5cb52 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vc
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen converted_to_draft https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750 >From 68ba9f46992b683dcff947e929e5050691acf0f1 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/4] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s| 42 +-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s| 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 -- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7ec9d59bfed5cf1..92e8ea9d23e12b1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19792,6 +19792,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { +llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); +llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); +llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); +llvm::Function *F = +CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); +return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f721d5267cd2a08..fbbf9f74cc9c633 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2353,7 +2353,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c06c932a5375e81..a0d904ba9a3353e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile, + VOP3_Profile_True16, + VOP3_Profile_Fake16, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1690,7 +1694,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32: VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32: VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32: VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 40e3fbda47787a6..c166cb58d5cb52d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b3
[clang] [llvm] [AMDGPU][True16][MC] true16 for v_alignbyte_b32 (PR #119750)
@@ -2353,8 +2353,8 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], broxigarchen wrote: Hi @kosarev can you help to comment on this implementation? Might need your input here https://github.com/llvm/llvm-project/pull/119750 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Clang] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Clang] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Clang] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Clang] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/133298 >From bd6300963b53ad8c5a4037e89e041fe1adfd3fe6 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 27 Mar 2025 14:41:40 -0400 Subject: [PATCH] add true16 feature in clang driver --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 clang/test/Driver/amdgpu-features.c| 5 + 3 files changed, 12 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7fcb160d3867..68179d7835440 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5182,6 +5182,9 @@ defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", defm amdgpu_precise_memory_op : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", " precise memory mode (AMDGPU only)">; +defm real_true16: SimpleMFlag<"real-true16", + "enable real-true16 in GFX11Plus", "enable real-true16 mode", + " real-true16 mode (AMDGPU only)">; def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">, Visibility<[ClangOption, CC1Option]>, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 1c5bb08568801..ce0cba5aa536a 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -707,6 +707,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_mno_amdgpu_precise_memory_op, false)) Features.push_back("+precise-memory"); + if (Args.hasFlag(options::OPT_mreal_true16, options::OPT_mno_real_true16, + false)) +Features.push_back("+real-true16"); + handleTargetFeaturesGroup(D, Triple, Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index 864744db203e9..f46f0251cadfc 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -38,3 +38,8 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s // NO-PREC-MEM-NOT: {{".*precise-memory"}} + +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mreal-true16 %s 2>&1 | FileCheck --check-prefix=REAL16 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mno-real-true16 %s 2>&1 | FileCheck --check-prefix=NO-REAL16 %s +// REAL16: "-target-feature" "+real-true16" +// NO-REAL16-NOT: {{".*real-true16"}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen ready_for_review https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Clang] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen closed https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/133298 >From a73229b233ab37e984932b99224b5aaf64b26747 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 27 Mar 2025 14:41:40 -0400 Subject: [PATCH] add true16 feature in clang driver --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 clang/test/Driver/amdgpu-features.c| 5 + 3 files changed, 12 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7fcb160d3867..3e962d4fa0508 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5182,6 +5182,9 @@ defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", defm amdgpu_precise_memory_op : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", " precise memory mode (AMDGPU only)">; +defm real-true16: SimpleMFlag<"real-true16", + "enable real-true16 in GFX11Plus", "enable real-true16 mode", + " mode (AMDGPU only)">; def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">, Visibility<[ClangOption, CC1Option]>, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 1c5bb08568801..0f2c4c34e91a4 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -707,6 +707,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_mno_amdgpu_precise_memory_op, false)) Features.push_back("+precise-memory"); + if (Args.hasFlag(options::OPT_mrealtrue16, options::OPT_mno_realtrue16, + false)) +Features.push_back("+real-true16"); + handleTargetFeaturesGroup(D, Triple, Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index 864744db203e9..dafff85f41cc6 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -38,3 +38,8 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s // NO-PREC-MEM-NOT: {{".*precise-memory"}} + +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mreal-true16 %s 2>&1 | FileCheck --check-prefix=REAL16 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mno-real-true16 %s 2>&1 | FileCheck --check-prefix=FAKE16 %s +// REAL16: "-target-feature" "+real-true16" +// FAKE16: {{".*real-true16"}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/133298 >From a94164ac07df115a7169ff98bd77dbf15174f2ba Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 27 Mar 2025 14:41:40 -0400 Subject: [PATCH] add true16 feature in clang driver --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 clang/test/Driver/amdgpu-features.c| 5 + 3 files changed, 12 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7fcb160d3867..decc9ed956a3e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5182,6 +5182,9 @@ defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", defm amdgpu_precise_memory_op : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", " precise memory mode (AMDGPU only)">; +defm realtrue16: SimpleMFlag<"realtrue16", + "enable realtrue16 in GFX11Plus", "enable realtrue16 mode", + " mode (AMDGPU only)">; def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">, Visibility<[ClangOption, CC1Option]>, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 1c5bb08568801..0f2c4c34e91a4 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -707,6 +707,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_mno_amdgpu_precise_memory_op, false)) Features.push_back("+precise-memory"); + if (Args.hasFlag(options::OPT_mrealtrue16, options::OPT_mno_realtrue16, + false)) +Features.push_back("+real-true16"); + handleTargetFeaturesGroup(D, Triple, Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index 864744db203e9..dafff85f41cc6 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -38,3 +38,8 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s // NO-PREC-MEM-NOT: {{".*precise-memory"}} + +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mreal-true16 %s 2>&1 | FileCheck --check-prefix=REAL16 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mno-real-true16 %s 2>&1 | FileCheck --check-prefix=FAKE16 %s +// REAL16: "-target-feature" "+real-true16" +// FAKE16: {{".*real-true16"}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/133298 >From 4285b82cd89ad6c036c27bc392fb59474d15db0b Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 27 Mar 2025 14:41:40 -0400 Subject: [PATCH] add true16 feature in clang driver --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 clang/test/Driver/amdgpu-features.c| 5 + 3 files changed, 12 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7fcb160d3867..68179d7835440 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5182,6 +5182,9 @@ defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", defm amdgpu_precise_memory_op : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", " precise memory mode (AMDGPU only)">; +defm real_true16: SimpleMFlag<"real-true16", + "enable real-true16 in GFX11Plus", "enable real-true16 mode", + " real-true16 mode (AMDGPU only)">; def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">, Visibility<[ClangOption, CC1Option]>, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 1c5bb08568801..ce0cba5aa536a 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -707,6 +707,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_mno_amdgpu_precise_memory_op, false)) Features.push_back("+precise-memory"); + if (Args.hasFlag(options::OPT_mreal_true16, options::OPT_mno_real_true16, + false)) +Features.push_back("+real-true16"); + handleTargetFeaturesGroup(D, Triple, Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index 864744db203e9..999e4e7240cdd 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -38,3 +38,8 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s // NO-PREC-MEM-NOT: {{".*precise-memory"}} + +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mrealtrue16 %s 2>&1 | FileCheck --check-prefix=REAL16 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mno-realtrue16 %s 2>&1 | FileCheck --check-prefix=FAKE16 %s +// REAL16: "-target-feature" "+real-true16" +// FAKE16: {{".*real-true16"}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/133298 >From fee2c8dcfb01fbe5d8e08cf9ccf6e92f341d76f9 Mon Sep 17 00:00:00 2001 From: guochen2 Date: Thu, 27 Mar 2025 14:41:40 -0400 Subject: [PATCH] add true16 feature in clang driver --- clang/include/clang/Driver/Options.td | 3 +++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 4 clang/test/Driver/amdgpu-features.c| 5 + 3 files changed, 12 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7fcb160d3867..68179d7835440 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5182,6 +5182,9 @@ defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64", defm amdgpu_precise_memory_op : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable", " precise memory mode (AMDGPU only)">; +defm real_true16: SimpleMFlag<"real-true16", + "enable real-true16 in GFX11Plus", "enable real-true16 mode", + " real-true16 mode (AMDGPU only)">; def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">, Visibility<[ClangOption, CC1Option]>, Alias; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 1c5bb08568801..ce0cba5aa536a 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -707,6 +707,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, options::OPT_mno_amdgpu_precise_memory_op, false)) Features.push_back("+precise-memory"); + if (Args.hasFlag(options::OPT_mreal_true16, options::OPT_mno_real_true16, + false)) +Features.push_back("+real-true16"); + handleTargetFeaturesGroup(D, Triple, Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index 864744db203e9..dafff85f41cc6 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -38,3 +38,8 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s // NO-PREC-MEM-NOT: {{".*precise-memory"}} + +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mreal-true16 %s 2>&1 | FileCheck --check-prefix=REAL16 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1100 -mno-real-true16 %s 2>&1 | FileCheck --check-prefix=FAKE16 %s +// REAL16: "-target-feature" "+real-true16" +// FAKE16: {{".*real-true16"}} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 feature option in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AMDGPU][True16][Driver] support true16 target feature in clang driver (PR #133298)
https://github.com/broxigarchen edited https://github.com/llvm/llvm-project/pull/133298 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits