This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rG0ad19a833177: [CUDA,NVPTX] Corrected fragment size for tf32 LD B matrix. (authored by JackAKirk, committed by tra).
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D118023/new/ https://reviews.llvm.org/D118023 Files: clang/lib/CodeGen/CGBuiltin.cpp Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -17190,7 +17190,7 @@ case NVPTX::BI__mma_tf32_m16n16k8_ld_a: return MMA_LDST(4, m16n16k8_load_a_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_b: - return MMA_LDST(2, m16n16k8_load_b_tf32); + return MMA_LDST(4, m16n16k8_load_b_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_c: return MMA_LDST(8, m16n16k8_load_c_f32);
Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -17190,7 +17190,7 @@ case NVPTX::BI__mma_tf32_m16n16k8_ld_a: return MMA_LDST(4, m16n16k8_load_a_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_b: - return MMA_LDST(2, m16n16k8_load_b_tf32); + return MMA_LDST(4, m16n16k8_load_b_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_c: return MMA_LDST(8, m16n16k8_load_c_f32);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits