Hello!

> Well you really want both the fpcr and the mxcsr registers, since the fpcr
> only controls the x87 and the mxcsr controls the xmm registers.  Note, in
> adding these registers, you are going to have to go through all of the 
> floating
> point patterns to add (use:HI FPCR_REG) and (use:SI MXCSR_REG) to each and
> every pattern so that the optimizer can be told not to move a floating point
> operation past the setting of the control word.

  I think that (use:...) clauses are needed only for (float)->(int) patterns
(fix_trunc.. & co.). For i386, we could calculate new mode word in advance (this
calculation is inserted by LCM), and fldcw insn is inserted just before
fist/frndint.

(define_insn_and_split "fix_trunc<mode>_i387_2"
  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
        (fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
   (use (match_operand:HI 2 "memory_operand" "m"))
   (use (match_operand:HI 3 "memory_operand" "m"))]
  "TARGET_80387 && !TARGET_FISTTP
   && FLOAT_MODE_P (GET_MODE (operands[1]))
   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
  "#"
  "reload_completed"
  [(set (reg:HI FPCR_REG)
        (unspec:HI [(match_dup 3)] UNSPEC_FLDCW))
   (parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
              (use (reg:HI FPCR_REG))])]
  ""
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "trunc")
   (set_attr "mode" "<MODE>")])


(define_insn "*fix_trunc<mode>_i387"
  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
        (fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
   (use (reg:HI FPCR_REG))]
  "TARGET_80387 && !TARGET_FISTTP
   && FLOAT_MODE_P (GET_MODE (operands[1]))
   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
  "* return output_fix_trunc (insn, operands, 0);"
  [(set_attr "type" "fistp")
   (set_attr "i387_cw" "trunc")
   (set_attr "mode" "<MODE>")])

I'm trying to use MODE_ENTRY and MODE_EXIT macros to insert mode calculations in
proper places. Currently, I have a somehow working prototype that switches
between 2 modes: MODE_UNINITIALIZED, MODE_TRUNC (and MODE_ANY). The trick here
is, that MODE_ENTRY and MODE_EXIT are defined to MODE_UNINITIALIZED. Secondly,
every asm statement and call insn switches to MODE_UNINITIALIZED, and when mode
is switched _from_ MODE_TRUNC _to_ MODE_UNINITIALIZED before these two
statements (or in exit BBs), an UNSPEC_VOLATILE type fldcw is emitted (again via
LCM) that switches fpu to saved mode. [UNSPEC_VOLATILE is needed to prevent
optimizers to remove this pattern]. So, 2 fldcw patterns are defined:

(define_insn "x86_fldcw_1"
  [(set (reg:HI FPCR_REG)
        (unspec:HI [(match_operand:HI 0 "memory_operand" "m")]
                     UNSPEC_FLDCW))]
  "TARGET_80387"
  "fldcw\t%0"
  [(set_attr "length" "2")
   (set_attr "mode" "HI")
   (set_attr "unit" "i387")
   (set_attr "athlon_decode" "vector")])

(define_insn "x86_fldcw_2"
  [(set (reg:HI FPCR_REG)
        (unspec_volatile:HI [(match_operand:HI 0 "memory_operand" "m")]
                              UNSPECV_FLDCW))]
  "TARGET_80387"
  "fldcw\t%0"
  [(set_attr "length" "2")
   (set_attr "mode" "HI")
   (set_attr "unit" "i387")
   (set_attr "athlon_decode" "vector")])

By using this approach, testcase:

int test (int *a, double *x) {
        int i;

        for (i = 10; i; i--) {
             a[i] = x[i];
        }

        return 0;
}

is compiled (with -O2 -fomit-frame-pointer -fgcse-after-reload) into:

test:
        pushl  %ebx
        xorl %edx, %edx
        subl $4, %esp
        fnstcw 2(%esp)         <- store current cw
        movl 12(%esp), %ebx
        movl 16(%esp), %ecx
        movzwl 2(%esp), %eax
        orw  $3072, %ax
        movw %ax, (%esp)       <- store new cw
        .p2align 4,,15
.L2:
        fldcw  (%esp)          <- hello? gcse-after-reload?
        fldl 80(%ecx,%edx,8)
        fistpl 40(%ebx,%edx,4)
        decl %edx
        cmpl $-10, %edx
        jne  .L2
        fldcw  2(%esp)         <- volatile fldcw in exit block (load stored cw)
        xorl %eax, %eax
        popl %edx
        popl %ebx
        ret

Another testcase, involving call:

extern double xxxx(int a);

int test (double a) {
        return xxxx (a);
}

is compiled into:

test:
        subl $12, %esp
        fnstcw 10(%esp)        <- store current control word
        fldl 16(%esp)
        movzwl 10(%esp), %eax
        orw  $3072, %ax
        movw %ax, 8(%esp)
        fldcw  8(%esp)         <- switch fpu to new mode
        fistpl (%esp)          <- make conversion
        fldcw  10(%esp)        <- volatile fldcw before call (load stored cw)
        call xxxx
        fnstcw 10(%esp)        <- rewrite stored control word after call
        movzwl 10(%esp), %eax
        orw  $3072, %ax
        movw %ax, 8(%esp)
        fldcw  8(%esp)         <- load new
        fistpl 4(%esp)         <- make conversion
        movl 4(%esp), %eax
        fldcw  10(%esp)        <- volatile fldcw in exit block (load stored cw)
        addl $12, %esp
        ret

Because ABI specifies that control word should be restored to saved mode, we
restore saved cw before call. After call, new control word is saved again -
because xxxxx could be cw-setting function and new cw shouldn't be rewritten by
saved cw at the beginning of the function.

Unfortunatelly, in first testcase, fldcw is not moved out of the loop, because
fix_trunc<mode>_i387_2 is splitted after gcse-after-reload pass (Is this
intentional for gcse-after-reload pass?)

Uros.

Reply via email to