================
@@ -1590,10 +1590,14 @@ let OtherPredicates = [isGFX12Plus] in {
let WaveSizePredicate = isWave32 in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32,
int_amdgcn_global_load_tr_b64, v2i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32,
int_amdgcn_global_load_tr_b128, v8i16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32,
int_amdgcn_global_load_tr_b128, v8f16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32,
int_amdgcn_global_load_tr_b128, v8bf16>;
}
let WaveSizePredicate = isWave64 in {
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64,
int_amdgcn_global_load_tr_b64, i32>;
defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64,
int_amdgcn_global_load_tr_b128, v4i16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64,
int_amdgcn_global_load_tr_b128, v4f16>;
+ defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64,
int_amdgcn_global_load_tr_b128, v4bf16>;
----------------
changpeng wrote:
Do you mean we should loop over [v8i16, v8f16, v8bf16]? Other types may not be
relevant to load transpose of 16-bit elements.
https://github.com/llvm/llvm-project/pull/99540
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits