Re: SH optimized software floating point routines

Christian Bruel Thu, 22 Jul 2010 06:58:26 -0700

oops, resending it with a small typo fix (a branch became delayed :-().

Just in case it we accepted that SNaNs and QNaNs are not exclusive andmimic the C model, a synthetic illustrative test case:


Compile with

sh-superh-elf-gcc -O2 -mieee -m4-nofpu snan.c snan2.c -g -o l.u ;sh-superh-elf-run l.u ; echo $?


Original 4.6 fp-bit C model:
OK

Using the ieee-sf.S implementation:
FAIL

Using the ieee-sf.S + this patch
OK

same for sh4-linux.

Best Regards,

Christian



Christian Bruel wrote:

Christian Bruel wrote:
Hi Kaz,

Kaz Kojima wrote:
BTW, it looks that softfp __unord?f2 routines check signaling NaNs
only.  This makes __builtin_isnan return false for quiet NaNs for
which current fp-bit ones return true when -mieee enabled.  Perhaps
that change of behavior might be OK for software FP.
I use the attached patch to handle the QNaNs in the assembly solf-fp.Need to be updated for trunk (and update the dates in changelogs). Will do.
Edited to apply on top of latest Joern's patch. Certainly not optimalbut it fixes the QNaNs checks for builtins and inlined unorderedcomparisons for -mieee or -fno-inite-math-only.
Best Regards

Christian

diff '--exclude=.svn' '--exclude=*.rej' '--exclude=*~' -ubrN 
gnu_trunk.ref/gcc/gcc/config/sh/ieee-754-df.S 
gnu_trunk/gcc/gcc/config/sh/ieee-754-df.S
--- gnu_trunk.ref/gcc/gcc/config/sh/ieee-754-df.S       2010-07-21 
18:04:17.949950000 +0200
+++ gnu_trunk/gcc/gcc/config/sh/ieee-754-df.S   2010-07-21 18:09:10.602376000 
+0200
@@ -92,11 +92,12 @@
        HIDDEN_FUNC(GLOBAL(nedf2))
 GLOBAL(nedf2):
        cmp/eq  DBL0L,DBL1L
-       mov.l   LOCAL(c_DF_NAN_MASK),r1
-       bf LOCAL(ne)
+       bf.s    LOCAL(ne)
+       mov     #1,r0
        cmp/eq  DBL0H,DBL1H
+       mov.l   LOCAL(c_DF_NAN_MASK),r1
+       bt.s    LOCAL(check_nan)
        not     DBL0H,r0
-       bt      LOCAL(check_nan)
        mov     DBL0H,r0
        or      DBL1H,r0
        add     r0,r0
@@ -104,11 +105,17 @@
        or      DBL0L,r0
 LOCAL(check_nan):
        tst     r1,r0
-       rts
+       bt.s    LOCAL(nan)
+       mov     #12,r2
+       shll16  r2
+       xor     r2,r1
+       tst     r1,r0
+LOCAL(nan):    
        movt    r0
 LOCAL(ne):
        rts
-       mov #1,r0
+       nop
+       
        .balign 4
 LOCAL(c_DF_NAN_MASK):
        .long DF_NAN_MASK
diff '--exclude=.svn' '--exclude=*.rej' '--exclude=*~' -ubrN 
gnu_trunk.ref/gcc/gcc/config/sh/ieee-754-sf.S 
gnu_trunk/gcc/gcc/config/sh/ieee-754-sf.S
--- gnu_trunk.ref/gcc/gcc/config/sh/ieee-754-sf.S       2010-07-22 
14:21:50.606831000 +0200
+++ gnu_trunk/gcc/gcc/config/sh/ieee-754-sf.S   2010-07-22 15:30:17.928097000 
+0200
@@ -58,6 +58,12 @@
        add     r0,r0
 LOCAL(check_nan):
        tst     r1,r0
+       bt.s    LOCAL(nan)
+       mov     #96,r2
+       shll16  r2
+       xor     r2,r1
+       tst     r1,r0   
+ LOCAL(nan):                   
        rts
        movt    r0
        .balign 4
diff '--exclude=.svn' '--exclude=*.rej' '--exclude=*~' -ubrN 
gnu_trunk.ref/gcc/gcc/config/sh/sh.md gnu_trunk/gcc/gcc/config/sh/sh.md
--- gnu_trunk.ref/gcc/gcc/config/sh/sh.md       2010-07-21 18:06:25.978547000 
+0200
+++ gnu_trunk/gcc/gcc/config/sh/sh.md   2010-07-22 09:13:12.599669000 +0200
@@ -10262,6 +10262,7 @@
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
   "TARGET_SH1 && ! TARGET_SH2E"
   "jsr @%1%#"
@@ -10337,13 +10338,18 @@
 
 (define_insn "cmpunsf_i1"
   [(set (reg:SI T_REG)
-       (unordered:SI (match_operand:SF 0 "arith_reg_operand" "r,r")
-                     (match_operand:SF 1 "arith_reg_operand" "r,r")))
-   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
-   (clobber (match_scratch:SI 3 "=0,&r"))]
+       (unordered:SI (match_operand:SF 0 "arith_reg_operand" "r")
+                     (match_operand:SF 1 "arith_reg_operand" "r")))
+     (use (match_operand:SI 2 "arith_reg_operand" "r"))
+     (clobber (match_scratch:SI 3 "=&r"))]
   "TARGET_SH1 && ! TARGET_SH2E"
-  "not\t%0,%3\;tst\t%2,%3\;not\t%1,%3\;bt\t0f\;tst\t%2,%3\;0:"
-  [(set_attr "length" "10")])
+    "not\t%0,%3\;tst\t%2,%3\;bt.s\t0f
+    \tnot\t%1,%3\;tst\t%2,%3\;bt.s\t0f
+    \tmov\t#96,%3\;shll16\t%3\;xor\t%3,%2
+    \tnot\t%0,%3\;tst\t%2,%3\;bt.s\t0f
+    \tnot\t%1,%3\;tst\t%2,%3
+     0:"
+    [(set_attr "length" "28")])
 
 ;; ??? This is a lot of code with a lot of branches; a library function
 ;; might be better.
@@ -11069,6 +11075,7 @@
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
   "TARGET_SH1_SOFTFP"
   "jsr @%1%#"
@@ -11093,6 +11100,7 @@
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r"))]
   "TARGET_SH1_SOFTFP"
   "jsr @%1%#"
@@ -11110,13 +11118,18 @@
 
 (define_insn "cmpundf_i1"
   [(set (reg:SI T_REG)
-       (unordered:SI (match_operand:DF 0 "arith_reg_operand" "r,r")
-                     (match_operand:DF 1 "arith_reg_operand" "r,r")))
-   (use (match_operand:SI 2 "arith_reg_operand" "r,r"))
-   (clobber (match_scratch:SI 3 "=0,&r"))]
+       (unordered:SI (match_operand:DF 0 "arith_reg_operand" "r")
+                     (match_operand:DF 1 "arith_reg_operand" "r")))
+   (use (match_operand:SI 2 "arith_reg_operand" "r"))
+   (clobber (match_scratch:SI 3 "=&r"))]
   "TARGET_SH1 && ! TARGET_SH2E"
-  "not\t%S0,%3\;tst\t%2,%3\;not\t%S1,%3\;bt\t0f\;tst\t%2,%3\;0:"
-  [(set_attr "length" "10")])
+   "not\t%S0,%3\;tst\t%2,%3\;bt.s\t0f
+  \tnot\t%S1,%3\;tst\t%2,%3\;bt.s\t0f
+  \tmov\t#12,%3\;shll16\t%3\;xor\t%3,%2
+  \tnot\t%S0,%3\;tst\t%2,%3\;bt.s\t0f
+  \tnot\t%S1,%3\;tst\t%2,%3
+0:"
+  [(set_attr "length" "28")])
 
 ;; ??? This is a lot of code with a lot of branches; a library function
 ;; might be better.

#include <stdlib.h>

extern int misnanf(float v);
extern int eqnf(float f3, float f4);

int main(void)
{
  float f1 = __builtin_nansf("");
  float f2 = __builtin_nanf("");
  float f3 = 2.0;
  float f4 = 3.3;

  if (! misnanf (f1))
    abort();

  if (! misnanf (f2))
    abort();

  if (misnanf (f3))
    abort();

  if (!eqnf (f3, f4))
    abort();

  if (eqnf (f4, f4))
    abort();

  return 0;
}

int eqnf(float f3, float f4)
{
  return f3 != f4;
}

int misnanf(float v)
{
  return (v != v);
}

Re: SH optimized software floating point routines

Reply via email to