On Mon, Aug 8, 2011 at 5:30 PM, Ulrich Weigand <uweig...@de.ibm.com> wrote:
> Uros Bizjak wrote:
>
>> Although, it would be nice for reload to subsequently fix CSE'd
>> non-offsetable memory by copying address to temporary reg (*as said in
>> the documentation*), we could simply require an XMM temporary for
>> TImode reloads to/from integer registers, and this fixes ICE for x32.
>
> Moves are special as far as reload is concerned.  If there is already
> a move instruction present *before* reload, it will get fixed up
> according to its constraints as any other instruction.
>
> However, reload will *introduce* new moves as part of its operation,
> and those will *not* themselves get reloaded.  Instead, reload simply
> assumes that every plain move will just succeed without requiring
> any reload; if this is not true, the target *must* provide a
> secondary reload for this move.
>
> (Note that the secondary reload could also work by reloading the
> target address into a temporary; that's up to the target to
> implement.)

Whoa, indeed.

Using attached patch that reloads memory address instead of going
through XMM register, the code for the testcase improves from:

test:
.LFB0:
        .cfi_startproc
        pushq   %rbx
        .cfi_def_cfa_offset 16
        .cfi_offset 3, -16
        sall    $4, %esi
        addl    %edi, %esi
        movdqa  (%esi), %xmm0
        movdqa  %xmm0, -16(%rsp)
        movq    -16(%rsp), %rcx
        movq    -8(%rsp), %rbx
        addq    $1, %rcx
        adcq    $0, %rbx
        movq    %rcx, -16(%rsp)
        sall    $4, %edx
        movq    %rbx, -8(%rsp)
        movdqa  -16(%rsp), %xmm0
        movdqa  %xmm0, (%esi)
        pxor    %xmm0, %xmm0
        movdqa  %xmm0, (%edx,%esi)
        popq    %rbx
        .cfi_def_cfa_offset 8
        ret

to:

test:
.LFB0:
        .cfi_startproc
        sall    $4, %esi
        pushq   %rbx
        .cfi_def_cfa_offset 16
        .cfi_offset 3, -16
        addl    %edi, %esi
        pxor    %xmm0, %xmm0
        mov     %esi, %eax
        movq    (%rax), %rcx
        movq    8(%rax), %rbx
        addq    $1, %rcx
        adcq    $0, %rbx
        sall    $4, %edx
        movq    %rcx, (%rax)
        movq    %rbx, 8(%rax)
        movdqa  %xmm0, (%edx,%esi)
        popq    %rbx
        .cfi_def_cfa_offset 8
        ret

H.J., can you please test attached patch? This optimization won't
trigger on x86_64 anymore.

Thanks,
Uros.
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 177565)
+++ config/i386/i386.md (working copy)
@@ -2073,6 +2073,40 @@
        (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,TI,DI,TI,DI,DI,DI,DI,DI")])
 
+;; Reload patterns to support multi-word load/store
+;; with non-offsetable address.
+(define_expand "reload_noff_store"
+  [(parallel [(match_operand 0 "memory_operand" "=m")
+              (match_operand 1 "register_operand" "r")
+              (match_operand:DI 2 "register_operand" "=&r")])]
+  "TARGET_64BIT"
+{
+  rtx mem = operands[0];
+  rtx addr = XEXP (mem, 0);
+
+  emit_move_insn (operands[2], addr);
+  mem = replace_equiv_address_nv (mem, operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, mem, operands[1]));
+  DONE;
+})
+
+(define_expand "reload_noff_load"
+  [(parallel [(match_operand 0 "register_operand" "=r")
+              (match_operand 1 "memory_operand" "m")
+              (match_operand:DI 2 "register_operand" "=r")])]
+  "TARGET_64BIT"
+{
+  rtx mem = operands[1];
+  rtx addr = XEXP (mem, 0);
+
+  emit_move_insn (operands[2], addr);
+  mem = replace_equiv_address_nv (mem, operands[2]);
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], mem));
+  DONE;
+})
+
 ;; Convert impossible stores of immediate to existing instructions.
 ;; First try to get scratch register and go through it.  In case this
 ;; fails, move by 32bit parts.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 177566)
+++ config/i386/i386.c  (working copy)
@@ -28245,18 +28245,25 @@
 
 static reg_class_t
 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
-                      enum machine_mode mode,
-                      secondary_reload_info *sri ATTRIBUTE_UNUSED)
+                      enum machine_mode mode, secondary_reload_info *sri)
 {
   /* Double-word spills from general registers to non-offsettable memory
-     references (zero-extended addresses) go through XMM register.  */
+     references (zero-extended addresses) require special handling.  */
   if (TARGET_64BIT
       && MEM_P (x)
       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
       && rclass == GENERAL_REGS
       && !offsettable_memref_p (x))
-    return SSE_REGS;
+    {
+      sri->icode = (in_p
+                   ? CODE_FOR_reload_noff_load
+                   : CODE_FOR_reload_noff_store);
+      /* Add the cost of move to a temporary.  */
+      sri->extra_cost = 1;
 
+      return NO_REGS;
+    }
+
   /* QImode spills from non-QI registers require
      intermediate register on 32bit targets.  */
   if (!TARGET_64BIT

Reply via email to