On Tue, Jul 08, 2014 at 09:05:10AM +0100, Peter Maydell wrote: > The code we have currently may well be buggy, but the correct
It is ;-/ We set TARGET_FPE_FLTINV unconditionally there. BTW, what's the reason why all these cpu_loop() instances can't go into linux-user/<arch>/something? Is that just because you have static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER; static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER; static int pending_cpus; and a bunch of inlines using them? As it is, about three quarters of linux-user/main.c consist of code under series of arch ifdefs... BTW, are there any more or less uptodate docs on qemu profiling? I mean, things like perf/oprofile on the host obviously end up lumping all tcg output together. Is there any way to get information beyond "~40% of time is spent in generated code, ~15% - in tb_find_fast(), and the rest is very much colder"? Incidentally, combination of --enable-gprof and (default) --enable-pie won't build - it dies with ld(1) complaining about relocs in gcrt1.o. With --disable-pie it builds, but gprof of course has the same problem as perf and friends - generated code is transient, so we get no details ;-/ > place to set si_code is (as Richard says) the Alpha cpu_loop() in > linux-user/main.c, which has access to the trap type that just > caused us to stop executing code, plus the CPUState, which > should be enough information to set si_code correctly. In > particular the GENTRAP case seems to be setting a variety > of different si_code values for SIGFPE. Sigh... Well, having read through alpha_fp_emul() and the stuff it calls, I understand why they hadn't implemented DNOD in any released hardware. It's a bloody mess, with tons of interesting special cases. E.g. adding denorm to very large finite can push into overflow, with further effects depending on whether we have overflow and/or denorm IEEE traps disabled, etc. Frankly, I suspect that it's better to have qemu-system-alpha behave like the actual hardware does (including "FPCR.DNOD can't be set") and keep the linux-user behaviour as is, for somebody brave and masochistic enough to fight that one. And no, it's nowhere near "just let denorms ride through the normal softfloat code and play a bit with the flags it might raise". And then there's netbsd/alpha and openbsd/alpha, so in theory somebody might want to play with their software completion semantics (not identical to Linux one) for the sake of yet-to-be-written bsd-user alpha support... Anyway, how about the following delta? AFAICS, it gets qemu-system-alpha behaviour in sync with actual hardware without screwing qemu-alpha up. diff --git a/target-alpha/fpu_helper.c b/target-alpha/fpu_helper.c index 9b297de..30cbf02 100644 --- a/target-alpha/fpu_helper.c +++ b/target-alpha/fpu_helper.c @@ -44,6 +44,12 @@ uint32_t helper_fp_exc_get(CPUAlphaState *env) return get_float_exception_flags(&FP_STATUS); } +enum { + Exc_Mask = float_flag_invalid | float_flag_int_overflow | + float_flag_divbyzero | float_flag_overflow | + float_flag_underflow | float_flag_inexact +}; + static inline void fp_exc_raise1(CPUAlphaState *env, uintptr_t retaddr, uint32_t exc, uint32_t regno, uint32_t hw_exc) { @@ -73,7 +79,7 @@ static inline void fp_exc_raise1(CPUAlphaState *env, uintptr_t retaddr, doesn't apply. */ void helper_fp_exc_raise(CPUAlphaState *env, uint32_t ignore, uint32_t regno) { - uint32_t exc = (uint8_t)env->fp_status.float_exception_flags; + uint32_t exc = (uint8_t)env->fp_status.float_exception_flags & Exc_Mask; if (exc) { env->fpcr_exc_status |= exc; exc &= ~ignore; @@ -86,7 +92,7 @@ void helper_fp_exc_raise(CPUAlphaState *env, uint32_t ignore, uint32_t regno) /* Raise exceptions for ieee fp insns with software completion. */ void helper_fp_exc_raise_s(CPUAlphaState *env, uint32_t ignore, uint32_t regno) { - uint32_t exc = (uint8_t)env->fp_status.float_exception_flags; + uint32_t exc = (uint8_t)env->fp_status.float_exception_flags & Exc_Mask; if (exc) { env->fpcr_exc_status |= exc; exc &= ~ignore; @@ -105,16 +111,14 @@ void helper_ieee_input(CPUAlphaState *env, uint64_t val) uint64_t frac = val & 0xfffffffffffffull; if (exp == 0) { - /* Denormals without DNZ set raise an exception. */ - if (frac != 0 && !env->fp_status.flush_inputs_to_zero) { - arith_excp(env, GETPC(), EXC_M_UNF, 0); + /* Denormals without /S raise an exception. */ + if (frac != 0) { + arith_excp(env, GETPC(), EXC_M_INV, 0); } } else if (exp == 0x7ff) { /* Infinity or NaN. */ - /* ??? I'm not sure these exception bit flags are correct. I do - know that the Linux kernel, at least, doesn't rely on them and - just emulates the insn to figure out what exception to use. */ - arith_excp(env, GETPC(), frac ? EXC_M_INV : EXC_M_FOV, 0); + env->fpcr_exc_status |= float_flag_invalid; + arith_excp(env, GETPC(), EXC_M_INV, 0); } } @@ -125,16 +129,34 @@ void helper_ieee_input_cmp(CPUAlphaState *env, uint64_t val) uint64_t frac = val & 0xfffffffffffffull; if (exp == 0) { - /* Denormals without DNZ set raise an exception. */ - if (frac != 0 && !env->fp_status.flush_inputs_to_zero) { - arith_excp(env, GETPC(), EXC_M_UNF, 0); + /* Denormals raise an exception. */ + if (frac != 0) { + arith_excp(env, GETPC(), EXC_M_INV, 0); } } else if (exp == 0x7ff && frac) { /* NaN. */ + env->fpcr_exc_status |= float_flag_invalid; arith_excp(env, GETPC(), EXC_M_INV, 0); } } +/* Input handing with software completion. Trap for denorms, + unless DNZ is set. *IF* we try to support DNOD (which + none of the produced hardware did, AFAICS), we'll need + to suppress the trap when FPCR.DNOD is set; then the + code downstream of that will need to cope with denorms + sans flush_input_to_zero. Most of it should work sanely, + but there's nothing to compare with... +*/ +void helper_ieee_input_s(CPUAlphaState *env, uint64_t val) +{ + if (unlikely(2 * val - 1 < 0x1fffffffffffff)) { + if (!FP_STATUS.flush_inputs_to_zero) { + arith_excp(env, GETPC(), EXC_M_INV | EXC_M_SWC, 0); + } + } +} + /* F floating (VAX) */ static uint64_t float32_to_f(float32 fa) { @@ -707,7 +729,8 @@ static inline uint64_t inline_cvttq(CPUAlphaState *env, uint64_t a, frac = a & 0xfffffffffffffull; if (exp == 0) { - if (unlikely(frac != 0)) { + if (unlikely(frac != 0) && !FP_STATUS.flush_inputs_to_zero) { + /* not going to happen without working DNOD; ifdef out, perhaps? */ goto do_underflow; } } else if (exp == 0x7ff) { @@ -826,7 +849,9 @@ uint64_t helper_cvtqg(CPUAlphaState *env, uint64_t a) void helper_fcvtql_v_input(CPUAlphaState *env, uint64_t val) { + set_float_exception_flags(0, &FP_STATUS); if (val != (int32_t)val) { - arith_excp(env, GETPC(), EXC_M_IOV, 0); + float_raise(float_flag_int_overflow, &FP_STATUS); + env->fpcr_exc_status |= float_flag_inexact; } } diff --git a/target-alpha/helper.c b/target-alpha/helper.c index 6bcde21..21af702 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -157,7 +157,9 @@ void cpu_alpha_store_fpcr (CPUAlphaState *env, uint64_t val) } env->fpcr_dyn_round = t; +#ifdef CONFIG_USER_ONLY env->fpcr_dnod = (val & FPCR_DNOD) != 0; +#endif env->fpcr_undz = (val & FPCR_UNDZ) != 0; env->fpcr_flush_to_zero = env->fpcr_dnod & env->fpcr_undz; env->fp_status.flush_inputs_to_zero = (val & FPCR_DNZ) != 0; diff --git a/target-alpha/helper.h b/target-alpha/helper.h index 2cc100b..596f24d 100644 --- a/target-alpha/helper.h +++ b/target-alpha/helper.h @@ -88,6 +88,7 @@ DEF_HELPER_FLAGS_3(fp_exc_raise_s, TCG_CALL_NO_WG, void, env, i32, i32) DEF_HELPER_FLAGS_2(ieee_input, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_2(fcvtql_v_input, TCG_CALL_NO_WG, void, env, i64) #if !defined (CONFIG_USER_ONLY) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 6ea33f3..ad041b0 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -655,6 +655,10 @@ static TCGv gen_ieee_input(DisasContext *ctx, int reg, int fn11, int is_cmp) } else { gen_helper_ieee_input(cpu_env, val); } + } else { +#ifndef CONFIG_USER_ONLY + gen_helper_ieee_input_s(cpu_env, val); +#endif } } return val; @@ -2256,24 +2260,15 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) gen_fcmov(ctx, TCG_COND_GT, ra, rb, rc); break; case 0x030: - /* CVTQL */ - REQUIRE_REG_31(ra); - vc = dest_fpr(ctx, rc); - vb = load_fpr(ctx, rb); - gen_fcvtql(vc, vb); - break; case 0x130: - /* CVTQL/V */ case 0x530: - /* CVTQL/SV */ + /* CVTQL, CVTQL/V, CVTQL/SV */ REQUIRE_REG_31(ra); - /* ??? I'm pretty sure there's nothing that /sv needs to do that - /v doesn't do. The only thing I can think is that /sv is a - valid instruction merely for completeness in the ISA. */ vc = dest_fpr(ctx, rc); vb = load_fpr(ctx, rb); gen_helper_fcvtql_v_input(cpu_env, vb); gen_fcvtql(vc, vb); + gen_fp_exc_raise(rc, fn11); break; default: goto invalid_opc;