https://llvm.org/bugs/show_bug.cgi?id=31697
Bug ID: 31697 Summary: Bitcasts block copy forwarding Product: libraries Version: trunk Hardware: All OS: All Status: NEW Severity: normal Priority: P Component: Global Analyses Assignee: unassignedb...@nondot.org Reporter: arc...@gmail.com CC: llvm-bugs@lists.llvm.org Classification: Unclassified If you have a situation where memory pointed to by a readonly argument %x is copied to a local alloca %y and then %y is projected from, then LLVM normally eliminates the unnecessary copy. This IR: declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) declare void @llvm.lifetime.start(i64, i8*) declare void @llvm.lifetime.end(i64, i8*) define i32 @copy_then_extract({ i32,i32,i32,i32 }* noalias nocapture readonly dereferenceable(16) %x, i32 %n) { entry: %y = alloca { i32, i32, i32, i32 }, align 16 %x0p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 0 %x0 = load i32, i32* %x0p %x1p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 1 %x1 = load i32, i32* %x1p %x2p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 2 %x2 = load i32, i32* %x2p %x3p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 3 %x3 = load i32, i32* %x3p %yi8 = bitcast { i32, i32, i32, i32 }* %y to i8* call void @llvm.lifetime.start(i64 16, i8* %yi8) %y0p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 0 store i32 %x0, i32* %y0p %y1p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 1 store i32 %x1, i32* %y1p %y2p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 2 store i32 %x2, i32* %y2p %y3p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 3 store i32 %x3, i32* %y3p %ya = bitcast i8* %yi8 to i32* %ya2p = getelementptr inbounds i32, i32* %ya, i64 2 %y2 = load i32, i32* %ya2p %yi8_2 = bitcast { i32, i32, i32, i32 }* %y to i8* call void @llvm.lifetime.end(i64 16, i8* %yi8_2) ret i32 %y2 } reduces to: define i32 @copy_then_extract({ i32, i32, i32, i32 }* noalias nocapture readonly dereferenceable(16) %x, i32 %n) local_unnamed_addr #0 { entry: %x2p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i64 0, i32 2 %x2 = load i32, i32* %x2p, align 4 ret i32 %x2 } However, if %ya2p becomes a dynamic GEP by changing `i64 2` to `i64 %n`, then the bitcast can no longer be reduced since that's not valid against a struct. This ends up completely blocking copy forwarding. This IR: declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) declare void @llvm.lifetime.start(i64, i8*) declare void @llvm.lifetime.end(i64, i8*) define i32 @copy_then_extract({ i32,i32,i32,i32 }* noalias nocapture readonly dereferenceable(16) %x, i32 %n) { entry: %y = alloca { i32, i32, i32, i32 }, align 16 %x0p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 0 %x0 = load i32, i32* %x0p %x1p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 1 %x1 = load i32, i32* %x1p %x2p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 2 %x2 = load i32, i32* %x2p %x3p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %x, i32 0, i32 3 %x3 = load i32, i32* %x3p %yi8 = bitcast { i32, i32, i32, i32 }* %y to i8* call void @llvm.lifetime.start(i64 16, i8* %yi8) %y0p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 0 store i32 %x0, i32* %y0p %y1p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 1 store i32 %x1, i32* %y1p %y2p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 2 store i32 %x2, i32* %y2p %y3p = getelementptr inbounds { i32, i32, i32, i32 }, { i32, i32, i32, i32 }* %y, i32 0, i32 3 store i32 %x3, i32* %y3p %ya = bitcast i8* %yi8 to i32* %ya2p = getelementptr inbounds i32, i32* %ya, i32 %n %y2 = load i32, i32* %ya2p %yi8_2 = bitcast { i32, i32, i32, i32 }* %y to i8* call void @llvm.lifetime.end(i64 16, i8* %yi8_2) ret i32 %y2 } still keeps the unnecessary copy in the optimized IR: define i32 @copy_then_extract({ i32, i32, i32, i32 }* noalias nocapture readonly dereferenceable(16) %x, i32 %n) local_unnamed_addr #1 { entry: %y = alloca <4 x i32>, align 16 %0 = bitcast { i32, i32, i32, i32 }* %x to <4 x i32>* %1 = load <4 x i32>, <4 x i32>* %0, align 4 %yi8 = bitcast <4 x i32>* %y to i8* call void @llvm.lifetime.start(i64 16, i8* %yi8) store <4 x i32> %1, <4 x i32>* %y, align 16 %2 = sext i32 %n to i64 %ya2p = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 0, i64 %2 %y2 = load i32, i32* %ya2p, align 4 call void @llvm.lifetime.end(i64 16, i8* %yi8) ret i32 %y2 } -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs