| Issue |
172159
|
| Summary |
[LLVM] LLVM fails to optimize out fptosi+select down to single cvttsd2si instruction on x86
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
johnplatts
|
Here is a snippet that LLVM fails to optimize down to a single cvttsd2si instruction:
```
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define i32 @X86ConvF64ValToI32(double %f64_val) local_unnamed_addr #0 {
%abs_f64_val = tail call double @llvm.fabs.f64(double %f64_val)
%f64_to_i32_result = fptosi double %f64_val to i32
%result_is_in_range = fcmp olt double %abs_f64_val, 0x41E0000000000000
%result = select i1 %result_is_in_range, i32 %f64_to_i32_result, i32 -2147483648
ret i32 %result
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare double @llvm.fabs.f64(double) #1
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```
Here is the assembly code generated for the above LLVM IR code on x86_64:
```
.LCPI0_0:
.quad 0x7fffffffffffffff # double NaN
.quad 0x7fffffffffffffff # double NaN
.LCPI0_1:
.quad 0x41e0000000000000 # double 2147483648
X86ConvF64ValToI32: # @X86ConvF64ValToI32
cvttsd2si ecx, xmm0
andpd xmm0, xmmword ptr [rip + .LCPI0_0]
movsd xmm1, qword ptr [rip + .LCPI0_1] # xmm1 = [2.147483648E+9,0.0E+0]
ucomisd xmm1, xmm0
mov eax, -2147483648
cmova eax, ecx
ret
```
The above X86ConvF64ValToI32 snippet can be optimized down to a single cvttsd2si instruction as both the above snippet and F64->I32 cvttsd2si are equivalent to `((fabs(x) < 2147483648.0) ? (int32_t)x : (int32_t)-2147483648)`.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs