https://llvm.org/bugs/show_bug.cgi?id=31143
Bug ID: 31143 Summary: False dependence breaking fails for ROUNDSSr Product: libraries Version: trunk Hardware: PC OS: Linux Status: NEW Severity: normal Priority: P Component: Backend: X86 Assignee: unassignedb...@nondot.org Reporter: mku...@google.com CC: llvm-bugs@lists.llvm.org Classification: Unclassified target triple = "x86_64-pc-linux-gnu" define void @test(float* nocapture %a, <4 x float>* nocapture %b, i32 %k) { entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv %v = load float, float* %arrayidx, align 4 %floor = call float @floorf(float %v) %sub = fsub float %floor, %v %v1 = insertelement <4 x float> undef, float %sub, i32 0 %br = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> store volatile <4 x float> %br, <4 x float>* %b, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %k br i1 %exitcond, label %for.end, label %for.body for.end: ret void } declare float @floorf(float) nounwind readnone When compiled with -msse4.1 (to emit a roundss for the floorf) we get: .LBB0_1: # %for.body # =>This Inner Loop Header: Depth=1 movss (%rdi), %xmm0 # xmm0 = mem[0],zero,zero,zero roundss $9, %xmm0, %xmm1 subss %xmm0, %xmm1 shufps $0, %xmm1, %xmm1 # xmm1 = xmm1[0,0,0,0] movups %xmm1, (%rsi) addq $4, %rdi decl %edx jne .LBB0_1 # BB#2: # %for.end retq The false dependence on %xmm1 is not broken. As expected, adding the requires xorps makes this loop about 3 times faster. -- You are receiving this mail because: You are on the CC list for the bug.
_______________________________________________ llvm-bugs mailing list llvm-bugs@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs