Author: Jeffrey Byrnes Date: 2022-10-14T09:52:11-07:00 New Revision: 85982d60133d2bfdabb33dbf95b1dce3f9754ae7
URL: https://github.com/llvm/llvm-project/commit/85982d60133d2bfdabb33dbf95b1dce3f9754ae7 DIFF: https://github.com/llvm/llvm-project/commit/85982d60133d2bfdabb33dbf95b1dce3f9754ae7.diff LOG: new selection patterns for load/store Added: Modified: llvm/lib/Target/AMDGPU/BUFInstructions.td llvm/lib/Target/AMDGPU/FLATInstructions.td llvm/lib/Target/AMDGPU/SIISelLowering.cpp Removed: ################################################################################ diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 09f3035c6215..47563dafe56c 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -949,6 +949,10 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; +//defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i8, atomic_load_8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", v2i8, atomic_load_16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", v2i8, atomic_load_8_global>; + defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < "buffer_store_byte", i32, truncstorei8_global >; @@ -1758,6 +1762,15 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, zextloadi8_global>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, i16, load_global>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, v2i8, sextloadi8_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, v2i8, extloadi8_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, v2i8, zextloadi8_constant>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, v2i8, sextloadi8_global>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, v2i8, extloadi8_global>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, v2i8, zextloadi8_global>; +defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_OFFSET, v2i8, load_global>; +//defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i8, load_global>; + } // End OtherPredicates = [Has16BitInsts] multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen, @@ -1802,6 +1815,13 @@ defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, zextloadi16_private>; defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i16, load_private>; + +defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, v2i8, extloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, v2i8, zextloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, v2i8, sextloadi8_private>; +defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, v2i8, load_private>; +//defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i8, load_private>; + foreach vt = Reg32Types.types in { defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, vt, load_private>; } @@ -1847,6 +1867,9 @@ defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_ defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i16, atomic_store_16_global>; defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_32_global>; defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_64_global>; +//defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i8, atomic_store_8_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, v2i8, atomic_store_8_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, v2i8, atomic_store_16_global>; } // End Predicates = isGFX6GFX7 @@ -1861,6 +1884,9 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt, defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>; defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, store_global>; +defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, v2i8, truncstorei8_global>; +defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, v2i8, store_global>; +//defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i8, store_global>; multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen, MUBUF_Pseudo InstrOffset, @@ -1884,6 +1910,9 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>; defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>; defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, v2i8, truncstorei8_private>; +defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, v2i8, store_private>; +//defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i8, store_private>; foreach vt = Reg32Types.types in { defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, vt, store_private>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index b7f9c558f83a..2f349d12167c 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -1106,14 +1106,21 @@ let OtherPredicates = [HasFlatAddressSpace] in { def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; +//def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i8>; +def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, v2i8>; def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; +def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, v2i8>; def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; +def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, v2i8>; +def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, v2i8>; +def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, v2i8>; +//def : FlatLoadPat <FLAT_LOAD_UBYTE, load_flat, i8>; def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; @@ -1125,6 +1132,9 @@ def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; +def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, v2i8>; +//def : FlatStorePat <FLAT_STORE_BYTE, store_flat, i8>; +def : FlatStorePat <FLAT_STORE_SHORT, store_flat, v2i8>; foreach vt = Reg32Types.types in { def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; @@ -1150,6 +1160,10 @@ def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; +//def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i8>; +def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, v2i8>; +def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, v2i8>; + foreach as = [ "flat", "global" ] in { defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; @@ -1350,18 +1364,29 @@ let OtherPredicates = [HasFlatGlobalInsts] in { defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; +//defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i8>; +defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, v2i8>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; +defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, v2i8>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; +defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, v2i8>; +defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, v2i8>; +defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, v2i8>; +//defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, load_global, i8>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; +defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, v2i8>; + + + foreach vt = Reg32Types.types in { defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; @@ -1392,6 +1417,11 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; +//defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, store_global, i8>; +defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, v2i8>; +defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, v2i8>; + + let OtherPredicates = [HasD16LoadStore] in { defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; @@ -1417,6 +1447,12 @@ defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32> defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; + +//defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i8>; +defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, v2i8>; +defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, v2i8>; + + defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; @@ -1521,6 +1557,13 @@ defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; +defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, v2i8>; +defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, v2i8>; +defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, v2i8>; +defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, v2i8>; +//defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, load_private, i8>; + + foreach vt = Reg32Types.types in { defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; @@ -1544,6 +1587,10 @@ defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; +//defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, store_private, i8>; +defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, v2i8>; +defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, v2i8>; + let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f9638eda346d..abb864c6a829 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -84,7 +84,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass); addRegisterClass(MVT::v4i8, &AMDGPU::SReg_32RegClass); - addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass); + //addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::i8, &AMDGPU::SReg_32RegClass); addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); @@ -543,6 +543,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::BSWAP, {MVT::i16, MVT::v2i16}, Legal); setOperationAction(ISD::BSWAP, MVT::v4i16, Custom); +/* + setOperationAction(ISD::STORE, MVT::v2i8, Promote); + AddPromotedToType(ISD::STORE, MVT::v2i8, MVT::i16); + setOperationAction(ISD::LOAD, MVT::v2i8, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2i8, MVT::i16); +*/ + // XXX - Do these do anything? Vector constants turn into build_vector. setOperationAction(ISD::Constant, {MVT::v2i16, MVT::v2f16}, Legal); _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits