On Sun, Jan 19, 2014 at 1:55 AM, Uros Bizjak <ubiz...@gmail.com> wrote: > On Sat, Jan 18, 2014 at 9:15 PM, H.J. Lu <hjl.to...@gmail.com> wrote: >> For LEA operation with SImode_address_operand, which zero-extends SImode >> to DImode, ix86_split_lea_for_addr turns >> >> (set (reg:DI) ...) >> >> into >> >> (set (reg:SI) ...) >> >> We need to do >> >> (set (reg:DI) (zero_extend:DI (reg:SI))) >> >> at the end. If the LEA operation is >> >> (set (reg:DI) (zero_extend:DI (reg:SI))) > > ree pass should remove these. However, we can just emit zero-extend > insn at the end of sequence, and ree (which is located after > post-reload split) should handle it: > > --cut here-- > Index: config/i386/i386.md > =================================================================== > --- config/i386/i386.md (revision 206753) > +++ config/i386/i386.md (working copy) > @@ -5428,12 +5428,17 @@ > operands[0] = SET_DEST (pat); > operands[1] = SET_SRC (pat); > > - /* Emit all operations in SImode for zero-extended addresses. Recall > - that x86_64 inheretly zero-extends SImode operations to DImode. */ > + /* Emit all operations in SImode for zero-extended addresses. */ > if (SImode_address_operand (operands[1], VOIDmode)) > mode = SImode; > > ix86_split_lea_for_addr (curr_insn, operands, mode); > + > + /* Zero-extend return register to DImode for zero-extended addresses. */ > + if (mode != <MODE>mode) > + emit_insn (gen_zero_extendsidi2 > + (operands[0], gen_lowpart ((mode), operands[0]))); > + > DONE; > } > [(set_attr "type" "lea") > --cut here-- > > The patch was tested with a testcase from Comment #9 of the PR using > "-O --march=corei7 -mtune=slm", and resulting binary runs without > problems. >
Yes, the resulting GCC works correctly. However, we generate extra (set (reg:DI) (zero_extend:DI (reg:SI))) It is because we generate (set (reg:SI) (reg:SI) (set (reg:DI) (zero_extend:DI (reg:SI))) REE pass doesn't know (set (reg:SI) (reg:SI) has an implicit ZERO_EXTEND. Here is a testcase: ---foo.c--- extern __thread unsigned int __bid_IDEC_glbflags; typedef unsigned long long UINT64; typedef __attribute__ ((aligned(16))) struct { UINT64 w[2]; } UINT128; extern UINT64 __bid64_from_uint64 (UINT64); extern void __bid_round64_2_18 (int q, int x, UINT64 C, UINT64 * ptr_Cstar, int *delta_exp, int *ptr_is_midpoint_lt_even, int *ptr_is_midpoint_gt_even, int *ptr_is_inexact_lt_midpoint, int *ptr_is_inexact_gt_midpoint); extern void __bid_round128_19_38 (int q, int x, UINT128 C, UINT128 * ptr_Cstar, int *delta_exp, int *ptr_is_midpoint_lt_even, int *ptr_is_midpoint_gt_even, int *ptr_is_inexact_lt_midpoint, int *ptr_is_inexact_gt_midpoint); UINT64 __bid64_from_uint64 (UINT64 x) { UINT64 res; UINT128 x128, res128; unsigned int q, ind; int incr_exp = 0; int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; if (x <= 0x002386F26FC0ffffull) { if (x < 0x0020000000000000ull) { res = 0x31c0000000000000ull | x; } else { res = 0x6c70000000000000ull | (x & 0x0007ffffffffffffull); } } else { if (x < 0x16345785d8a0000ull) { q = 17; ind = 1; } else if (x < 0xde0b6b3a7640000ull) { q = 18; ind = 2; } else if (x < 0x8ac7230489e80000ull) { q = 19; ind = 3; } else { q = 20; ind = 4; } if (q <= 19) { __bid_round64_2_18 ( q, ind, x, &res, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); } else { x128.w[1] = 0x0; x128.w[0] = x; __bid_round128_19_38 (q, ind, x128, &res128, &incr_exp, &is_midpoint_lt_even, &is_midpoint_gt_even, &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); res = res128.w[0]; } if (incr_exp) ind++; if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || is_midpoint_lt_even || is_midpoint_gt_even) *&__bid_IDEC_glbflags |= 0x00000020; if (res < 0x0020000000000000ull) { res = (((UINT64) ind + 398) << 53) | res; } else { res = 0x6000000000000000ull | (((UINT64) ind + 398) << 51) | (res & 0x0007ffffffffffffull); } } return(res);; } ----------- Compiling with -fPIC -O2, the differences between your patch and mine are --- bad.s 2014-01-19 06:10:28.006570325 -0800 +++ foo.s 2014-01-19 06:11:46.117754696 -0800 @@ -84,19 +84,18 @@ __bid64_from_uint64: movabsq $9007199254740991, %rax cmpq %rax, %rbx jbe .L23 - movl %ebp, %edx leaq 88(%rsp), %rsp .cfi_remember_state .cfi_def_cfa_offset 24 movabsq $2251799813685247, %rax - movl %edx, %edx + movl %ebp, %edx andq %rbx, %rax - movabsq $6917529027641081856, %rcx popq %rbx .cfi_def_cfa_offset 16 + movabsq $6917529027641081856, %rcx addq $398, %rdx - orq %rcx, %rax salq $51, %rdx + orq %rcx, %rax popq %rbp .cfi_def_cfa_offset 8 orq %rdx, %rax @@ -154,7 +153,6 @@ __bid64_from_uint64: leaq 88(%rsp), %rsp .cfi_remember_state .cfi_def_cfa_offset 24 - movl %eax, %eax addq $398, %rax salq $53, %rax orq %rbx, %rax My patch removes 2 extra (set (reg:DI) (zero_extend:DI (reg:SI))) -- H.J.