Issue 172159
Summary [LLVM] LLVM fails to optimize out fptosi+select down to single cvttsd2si instruction on x86
Labels new issue
Assignees
Reporter johnplatts
    Here is a snippet that LLVM fails to optimize down to a single cvttsd2si instruction:
```
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define i32 @X86ConvF64ValToI32(double %f64_val) local_unnamed_addr #0 {
  %abs_f64_val = tail call double @llvm.fabs.f64(double %f64_val)
  %f64_to_i32_result = fptosi double %f64_val to i32
  %result_is_in_range = fcmp olt double %abs_f64_val, 0x41E0000000000000
  %result = select i1 %result_is_in_range, i32 %f64_to_i32_result, i32 -2147483648
  ret i32 %result
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare double @llvm.fabs.f64(double) #1

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```

Here is the assembly code generated for the above LLVM IR code on x86_64:
```
.LCPI0_0:
 .quad   0x7fffffffffffffff              # double NaN
        .quad 0x7fffffffffffffff              # double NaN
.LCPI0_1:
        .quad 0x41e0000000000000              # double 2147483648
X86ConvF64ValToI32: # @X86ConvF64ValToI32
        cvttsd2si       ecx, xmm0
 andpd   xmm0, xmmword ptr [rip + .LCPI0_0]
        movsd   xmm1, qword ptr [rip + .LCPI0_1] # xmm1 = [2.147483648E+9,0.0E+0]
        ucomisd xmm1, xmm0
        mov     eax, -2147483648
        cmova   eax, ecx
 ret
```

The above X86ConvF64ValToI32 snippet can be optimized down to a single cvttsd2si instruction as both the above snippet and F64->I32 cvttsd2si are equivalent to `((fabs(x) < 2147483648.0) ? (int32_t)x : (int32_t)-2147483648)`.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to