LuoYuanke added inline comments.
================ Comment at: llvm/lib/Target/X86/X86LowerAMXType.cpp:265 + // If the dst type is <256 x i32>*, it is valid intruction. + // %0 = bitcast x86_amx* %tile to <256 x i32>* + // %1 = load <256 x i32>, <256 x i32>* %0, align 64 ---------------- pengfei wrote: > Where's `x86_amx* %tile` from? Shouldn't been transfered to `x86_amx` before > this bitcast if it exists? In my test case, it is transformed after Combine redundant instructions. ``` *** IR Dump After Simplify the CFG *** define internal fastcc void @_ZL12__tile_loaddP15__tile1024i_strPKvm(%struct.__tile1024i_str* nocapture %dst) unnamed_addr #4 { entry: %row = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 0 %0 = load i16, i16* %row, align 64, !tbaa !2 %col = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 1 %1 = load i16, i16* %col, align 2, !tbaa !7 %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 %1, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 64) #6 %3 = bitcast x86_amx %2 to <256 x i32> %tile = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 3 store <256 x i32> %3, <256 x i32>* %tile, align 64, !tbaa !8 ret void } ``` To ``` *** IR Dump After Combine redundant instructions *** ; Function Attrs: alwaysinline nounwind uwtable mustprogress define internal fastcc void @_ZL12__tile_loaddP15__tile1024i_strPKvm(%struct.__tile1024i_str* nocapture %dst) unnamed_addr #4 { entry: %row = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 0 %0 = load i16, i16* %row, align 64, !tbaa !2 %col = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 1 %1 = load i16, i16* %col, align 2, !tbaa !7 %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 %0, i16 %1, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 64) #6 %tile = getelementptr inbounds %struct.__tile1024i_str, %struct.__tile1024i_str* %dst, i64 0, i32 3 %3 = bitcast <256 x i32>* %tile to x86_amx* store x86_amx %2, x86_amx* %3, align 64, !tbaa !8 ret void } ``` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D91927/new/ https://reviews.llvm.org/D91927 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits