Hello! Attached patch splits movdi_via_fpu to loaddi_via_fpu and storedi_via_fpu. As a consequence, independent insns can be scheduled between load and store, and insn length is now calculated correctly.
2011-11-29 Uros Bizjak <ubiz...@gmail.com> * config/i386/sync.md (UNSPEC_LDA, UNSPEC_STA): New unspecs. (movdi_via_fpu): Remove. (loaddi_via_fpu): New insn pattern. (storedi_via_fpu): Ditto. (atomic_loaddi_fpu): Use loaddi_via_fpu and storedi_via_fpu. (atomic_storedi_fpu): Ditto. * reg-stack.c (get_true_reg): Handle UNSPEC_LDA. (subst_stack_regs_pat): Handle UNSPEC_STA. Tested on x86_64-pc-linux-gnu {,-m32}. Patch will be committed soon to SVN mainline. Uros.
Index: config/i386/sync.md =================================================================== --- config/i386/sync.md (revision 181804) +++ config/i386/sync.md (working copy) @@ -23,6 +23,8 @@ UNSPEC_SFENCE UNSPEC_MFENCE UNSPEC_MOVA ; For __atomic support + UNSPEC_LDA + UNSPEC_STA ]) (define_c_enum "unspecv" [ @@ -180,7 +182,10 @@ mem = dst; if (FP_REG_P (tmp)) - emit_insn (gen_movdi_via_fpu (mem, src, tmp)); + { + emit_insn (gen_loaddi_via_fpu (tmp, src)); + emit_insn (gen_storedi_via_fpu (mem, tmp)); + } else { adjust_reg_mode (tmp, DImode); @@ -258,7 +263,8 @@ if (FP_REG_P (tmp)) { - emit_insn (gen_movdi_via_fpu (dst, src, tmp)); + emit_insn (gen_loaddi_via_fpu (tmp, src)); + emit_insn (gen_storedi_via_fpu (dst, tmp)); DONE; } else @@ -276,16 +282,28 @@ ;; operations. But the fix_trunc patterns want way more setup than we want ;; to provide. Note that the scratch is DFmode instead of XFmode in order ;; to make it easy to allocate a scratch in either SSE or FP_REGs above. -(define_insn "movdi_via_fpu" + +(define_insn "loaddi_via_fpu" + [(set (match_operand:DF 0 "register_operand" "=f") + (unspec:DF [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_LDA))] + "TARGET_80387" + "fild%Z1\t%1" + [(set_attr "type" "fmov") + (set_attr "mode" "DF") + (set_attr "fp_int_src" "true")]) + +(define_insn "storedi_via_fpu" [(set (match_operand:DI 0 "memory_operand" "=m") - (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_MOVA)) - (clobber (match_operand:DF 2 "register_operand" "=f"))] + (unspec:DI [(match_operand:DF 1 "register_operand" "f")] UNSPEC_STA))] "TARGET_80387" - "fild%Z1\t%1\;fistp%Z0\t%0" - [(set_attr "type" "multi") - ;; Worst case based on full sib+offset32 addressing modes - (set_attr "length" "14")]) +{ + gcc_assert (find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != NULL_RTX); + return "fistp%Z0\t%0"; +} + [(set_attr "type" "fmov") + (set_attr "mode" "DI")]) + (define_expand "atomic_compare_and_swap<mode>" [(match_operand:QI 0 "register_operand" "") ;; bool success output (match_operand:SWI124 1 "register_operand" "") ;; oldval output Index: reg-stack.c =================================================================== --- reg-stack.c (revision 181804) +++ reg-stack.c (working copy) @@ -434,7 +434,8 @@ get_true_reg (rtx *pat) break; case UNSPEC: - if (XINT (*pat, 1) == UNSPEC_TRUNC_NOOP) + if (XINT (*pat, 1) == UNSPEC_TRUNC_NOOP + || XINT (*pat, 1) == UNSPEC_LDA) pat = & XVECEXP (*pat, 0, 0); return pat; @@ -1677,6 +1678,7 @@ subst_stack_regs_pat (rtx insn, stack regstack, rt case UNSPEC: switch (XINT (pat_src, 1)) { + case UNSPEC_STA: case UNSPEC_FIST: case UNSPEC_FIST_FLOOR: