On Oct 17, 2013, at 1:46 AM, Richard Biener <rguent...@suse.de> wrote:
[This function shows another optimization issue:
case BOOLEAN_TYPE:
/* Cache false or true. */
limit = 2;
if (wi::leu_p (cst, 1))
ix = cst.to_uhwi ();
I would have expected cst <= 1 be optimized to cst.len == 1 &&
cst.val[0] <= 1.
So lts_p has the same problem, and is used just below. If we do:
@@ -1461,12 +1470,22 @@ wi::ne_p (const T1 &x, const T2 &y)
inline bool
wi::lts_p (const wide_int_ref &x, const wide_int_ref &y)
{
- if (x.precision <= HOST_BITS_PER_WIDE_INT
- && y.precision <= HOST_BITS_PER_WIDE_INT)
- return x.slow () < y.slow ();
- else
- return lts_p_large (x.val, x.len, x.precision, y.val, y.len,
- y.precision);
+ // We optimize w < N, where N is 64 or fewer bits.
+ if (y.precision <= HOST_BITS_PER_WIDE_INT)
+ {
+ // If it fits directly into a shwi, we can compare directly.
+ if (wi::fits_shwi_p (x))
+ return x.slow () < y.slow ();
+ // If it is doesn't fit, then it must be more negative than any
+ // value in y, and hence smaller.
+ if (neg_p (x, SIGNED))
+ return true;
+ // If it is positve, then it must be larger than any value in y,
+ // and hence greater than.
+ return false;
+ }
+ return lts_p_large (x.val, x.len, x.precision, y.val, y.len,
+ y.precision);
}
then for:
bool MGEN(wide_int cst) {
return wi::lts_p (cst, 100);
}
we generate:
movl 520(%rsp), %eax
cmpl $1, %eax
je .L5283
subl $1, %eax
movq 8(%rsp,%rax,8), %rax
shrq $63, %rax
ret
.p2align 4,,10
.p2align 3
.L5283:
cmpq $99, 8(%rsp)
setle %al
ret
which as you can see, never calls lts_p_large, and hence, if that was the only
reason the storage escapes, then it should be able to optimize better. In the
above code, minimal, no function calls, and the 100 is propagated all the way
down into the cmpq. Now, the reason why we should do it this way, most of the
time, most types are 64 bits or less. When that is true, then it will never
call into lts_p_large.
If the above 100 is changed to l, from a parameter long l, then the code is the
same except the last part is:
cmpq 8(%rsp), %rdi
setg %al
ret
If we have two wide_int's, then the code does this:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movl 1052(%rsp), %r9d
movl 1048(%rsp), %r8d
movl 532(%rsp), %edx
movl 528(%rsp), %esi
cmpl $64, %r9d
ja .L5281
cmpl $1, %esi
je .L5284
subl $1, %esi
movq 16(%rsp,%rsi,8), %rax
addq $8, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 8
shrq $63, %rax
ret
.p2align 4,,10
.p2align 3
.L5281:
.cfi_restore_state
leaq 536(%rsp), %rcx
leaq 16(%rsp), %rdi
call _ZN2wi11lts_p_largeEPKljjS1_jj
addq $8, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L5284:
.cfi_restore_state
movq 536(%rsp), %rax
cmpq %rax, 16(%rsp)
setl %al
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
which is still pretty nice.
Does this look ok? Kenny, can you double check the cases, think I have them
right, but? a double check would be good.