Re: [Qemu-devel] [RFC] alpha qemu arithmetic exceptions

Al Viro Wed, 09 Jul 2014 15:03:41 -0700

On Tue, Jul 08, 2014 at 09:05:10AM +0100, Peter Maydell wrote:

> The code we have currently may well be buggy, but the correct


It is ;-/  We set TARGET_FPE_FLTINV unconditionally there.  BTW, what's
the reason why all these cpu_loop() instances can't go into
linux-user/<arch>/something?  Is that just because you have
static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
static int pending_cpus;
and a bunch of inlines using them?  As it is, about three quarters of
linux-user/main.c consist of code under series of arch ifdefs...

BTW, are there any more or less uptodate docs on qemu profiling?  I mean,
things like perf/oprofile on the host obviously end up lumping all tcg
output together.  Is there any way to get information beyond "~40% of time
is spent in generated code, ~15% - in tb_find_fast(), and the rest is very
much colder"?

Incidentally, combination of --enable-gprof and (default) --enable-pie
won't build - it dies with ld(1) complaining about relocs in gcrt1.o.
With --disable-pie it builds, but gprof of course has the same problem
as perf and friends - generated code is transient, so we get no details ;-/

> place to set si_code is (as Richard says) the Alpha cpu_loop() in
> linux-user/main.c, which has access to the trap type that just
> caused us to stop executing code, plus the CPUState, which
> should be enough information to set si_code correctly. In
> particular the GENTRAP case seems to be setting a variety
> of different si_code values for SIGFPE.

Sigh...  Well, having read through alpha_fp_emul() and the stuff it calls,
I understand why they hadn't implemented DNOD in any released hardware.
It's a bloody mess, with tons of interesting special cases.  E.g. adding
denorm to very large finite can push into overflow, with further effects
depending on whether we have overflow and/or denorm IEEE traps disabled,
etc.

Frankly, I suspect that it's better to have qemu-system-alpha behave like
the actual hardware does (including "FPCR.DNOD can't be set") and keep the
linux-user behaviour as is, for somebody brave and masochistic enough to
fight that one.  And no, it's nowhere near "just let denorms ride through
the normal softfloat code and play a bit with the flags it might raise".
And then there's netbsd/alpha and openbsd/alpha, so in theory somebody might
want to play with their software completion semantics (not identical to Linux
one) for the sake of yet-to-be-written bsd-user alpha support...

Anyway, how about the following delta?  AFAICS, it gets qemu-system-alpha
behaviour in sync with actual hardware without screwing qemu-alpha up.

diff --git a/target-alpha/fpu_helper.c b/target-alpha/fpu_helper.c
index 9b297de..30cbf02 100644
--- a/target-alpha/fpu_helper.c
+++ b/target-alpha/fpu_helper.c
@@ -44,6 +44,12 @@ uint32_t helper_fp_exc_get(CPUAlphaState *env)
     return get_float_exception_flags(&FP_STATUS);
 }
 
+enum {
+       Exc_Mask = float_flag_invalid | float_flag_int_overflow |
+                  float_flag_divbyzero | float_flag_overflow |
+                  float_flag_underflow | float_flag_inexact
+};
+
 static inline void fp_exc_raise1(CPUAlphaState *env, uintptr_t retaddr,
                                  uint32_t exc, uint32_t regno, uint32_t hw_exc)
 {
@@ -73,7 +79,7 @@ static inline void fp_exc_raise1(CPUAlphaState *env, 
uintptr_t retaddr,
    doesn't apply.  */
 void helper_fp_exc_raise(CPUAlphaState *env, uint32_t ignore, uint32_t regno)
 {
-    uint32_t exc = (uint8_t)env->fp_status.float_exception_flags;
+    uint32_t exc = (uint8_t)env->fp_status.float_exception_flags & Exc_Mask;
     if (exc) {
         env->fpcr_exc_status |= exc;
         exc &= ~ignore;
@@ -86,7 +92,7 @@ void helper_fp_exc_raise(CPUAlphaState *env, uint32_t ignore, 
uint32_t regno)
 /* Raise exceptions for ieee fp insns with software completion.  */
 void helper_fp_exc_raise_s(CPUAlphaState *env, uint32_t ignore, uint32_t regno)
 {
-    uint32_t exc = (uint8_t)env->fp_status.float_exception_flags;
+    uint32_t exc = (uint8_t)env->fp_status.float_exception_flags & Exc_Mask;
     if (exc) {
         env->fpcr_exc_status |= exc;
         exc &= ~ignore;
@@ -105,16 +111,14 @@ void helper_ieee_input(CPUAlphaState *env, uint64_t val)
     uint64_t frac = val & 0xfffffffffffffull;
 
     if (exp == 0) {
-        /* Denormals without DNZ set raise an exception.  */
-        if (frac != 0 && !env->fp_status.flush_inputs_to_zero) {
-            arith_excp(env, GETPC(), EXC_M_UNF, 0);
+        /* Denormals without /S raise an exception.  */
+        if (frac != 0) {
+            arith_excp(env, GETPC(), EXC_M_INV, 0);
         }
     } else if (exp == 0x7ff) {
         /* Infinity or NaN.  */
-        /* ??? I'm not sure these exception bit flags are correct.  I do
-           know that the Linux kernel, at least, doesn't rely on them and
-           just emulates the insn to figure out what exception to use.  */
-        arith_excp(env, GETPC(), frac ? EXC_M_INV : EXC_M_FOV, 0);
+        env->fpcr_exc_status |= float_flag_invalid;
+        arith_excp(env, GETPC(), EXC_M_INV, 0);
     }
 }
 
@@ -125,16 +129,34 @@ void helper_ieee_input_cmp(CPUAlphaState *env, uint64_t 
val)
     uint64_t frac = val & 0xfffffffffffffull;
 
     if (exp == 0) {
-        /* Denormals without DNZ set raise an exception.  */
-        if (frac != 0 && !env->fp_status.flush_inputs_to_zero) {
-            arith_excp(env, GETPC(), EXC_M_UNF, 0);
+        /* Denormals raise an exception.  */
+        if (frac != 0) {
+            arith_excp(env, GETPC(), EXC_M_INV, 0);
         }
     } else if (exp == 0x7ff && frac) {
         /* NaN.  */
+        env->fpcr_exc_status |= float_flag_invalid;
         arith_excp(env, GETPC(), EXC_M_INV, 0);
     }
 }
 
+/* Input handing with software completion.  Trap for denorms,
+   unless DNZ is set.  *IF* we try to support DNOD (which
+   none of the produced hardware did, AFAICS), we'll need
+   to suppress the trap when FPCR.DNOD is set; then the
+   code downstream of that will need to cope with denorms
+   sans flush_input_to_zero.  Most of it should work sanely,
+   but there's nothing to compare with...
+*/
+void helper_ieee_input_s(CPUAlphaState *env, uint64_t val)
+{
+    if (unlikely(2 * val - 1 < 0x1fffffffffffff)) {
+       if (!FP_STATUS.flush_inputs_to_zero) {
+           arith_excp(env, GETPC(), EXC_M_INV | EXC_M_SWC, 0);
+       }
+    }
+}
+
 /* F floating (VAX) */
 static uint64_t float32_to_f(float32 fa)
 {
@@ -707,7 +729,8 @@ static inline uint64_t inline_cvttq(CPUAlphaState *env, 
uint64_t a,
     frac = a & 0xfffffffffffffull;
 
     if (exp == 0) {
-        if (unlikely(frac != 0)) {
+        if (unlikely(frac != 0) && !FP_STATUS.flush_inputs_to_zero) {
+           /* not going to happen without working DNOD; ifdef out, perhaps? */
             goto do_underflow;
         }
     } else if (exp == 0x7ff) {
@@ -826,7 +849,9 @@ uint64_t helper_cvtqg(CPUAlphaState *env, uint64_t a)
 
 void helper_fcvtql_v_input(CPUAlphaState *env, uint64_t val)
 {
+    set_float_exception_flags(0, &FP_STATUS);
     if (val != (int32_t)val) {
-        arith_excp(env, GETPC(), EXC_M_IOV, 0);
+        float_raise(float_flag_int_overflow, &FP_STATUS);
+        env->fpcr_exc_status |= float_flag_inexact;
     }
 }
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index 6bcde21..21af702 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -157,7 +157,9 @@ void cpu_alpha_store_fpcr (CPUAlphaState *env, uint64_t val)
     }
     env->fpcr_dyn_round = t;
 
+#ifdef CONFIG_USER_ONLY
     env->fpcr_dnod = (val & FPCR_DNOD) != 0;
+#endif
     env->fpcr_undz = (val & FPCR_UNDZ) != 0;
     env->fpcr_flush_to_zero = env->fpcr_dnod & env->fpcr_undz;
     env->fp_status.flush_inputs_to_zero = (val & FPCR_DNZ) != 0;
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index 2cc100b..596f24d 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -88,6 +88,7 @@ DEF_HELPER_FLAGS_3(fp_exc_raise_s, TCG_CALL_NO_WG, void, env, 
i32, i32)
 
 DEF_HELPER_FLAGS_2(ieee_input, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_2(fcvtql_v_input, TCG_CALL_NO_WG, void, env, i64)
 
 #if !defined (CONFIG_USER_ONLY)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 6ea33f3..ad041b0 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -655,6 +655,10 @@ static TCGv gen_ieee_input(DisasContext *ctx, int reg, int 
fn11, int is_cmp)
             } else {
                 gen_helper_ieee_input(cpu_env, val);
             }
+        } else {
+#ifndef CONFIG_USER_ONLY
+            gen_helper_ieee_input_s(cpu_env, val);
+#endif
         }
     }
     return val;
@@ -2256,24 +2260,15 @@ static ExitStatus translate_one(DisasContext *ctx, 
uint32_t insn)
             gen_fcmov(ctx, TCG_COND_GT, ra, rb, rc);
             break;
         case 0x030:
-            /* CVTQL */
-            REQUIRE_REG_31(ra);
-            vc = dest_fpr(ctx, rc);
-            vb = load_fpr(ctx, rb);
-            gen_fcvtql(vc, vb);
-            break;
         case 0x130:
-            /* CVTQL/V */
         case 0x530:
-            /* CVTQL/SV */
+            /* CVTQL, CVTQL/V, CVTQL/SV */
             REQUIRE_REG_31(ra);
-            /* ??? I'm pretty sure there's nothing that /sv needs to do that
-               /v doesn't do.  The only thing I can think is that /sv is a
-               valid instruction merely for completeness in the ISA.  */
             vc = dest_fpr(ctx, rc);
             vb = load_fpr(ctx, rb);
             gen_helper_fcvtql_v_input(cpu_env, vb);
             gen_fcvtql(vc, vb);
+            gen_fp_exc_raise(rc, fn11);
             break;
         default:
             goto invalid_opc;

Re: [Qemu-devel] [RFC] alpha qemu arithmetic exceptions

Reply via email to