The use the same opcode as EMMS, which I guess makes some sort of sense. Fairly strightforward other than that.
If we were wanting to optimize out gen_clear_ymmh then this would be one of the starting points. Signed-off-by: Paul Brook <p...@nowt.org> --- target/i386/ops_sse.h | 48 ++++++++++++++++++++++++++++++++++++ target/i386/ops_sse_header.h | 9 +++++++ target/i386/tcg/translate.c | 26 ++++++++++++++++--- 3 files changed, 80 insertions(+), 3 deletions(-) diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index ad3312d353..a1f50f0c8b 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -3071,6 +3071,54 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, #endif #endif +#if SHIFT == 2 +void helper_vzeroall(CPUX86State *env) +{ + int i; + + for (i = 0; i < 8; i++) { + env->xmm_regs[i].ZMM_Q(0) = 0; + env->xmm_regs[i].ZMM_Q(1) = 0; + env->xmm_regs[i].ZMM_Q(2) = 0; + env->xmm_regs[i].ZMM_Q(3) = 0; + } +} + +void helper_vzeroupper(CPUX86State *env) +{ + int i; + + for (i = 0; i < 8; i++) { + env->xmm_regs[i].ZMM_Q(2) = 0; + env->xmm_regs[i].ZMM_Q(3) = 0; + } +} + +#ifdef TARGET_X86_64 +void helper_vzeroall_hi8(CPUX86State *env) +{ + int i; + + for (i = 8; i < 16; i++) { + env->xmm_regs[i].ZMM_Q(0) = 0; + env->xmm_regs[i].ZMM_Q(1) = 0; + env->xmm_regs[i].ZMM_Q(2) = 0; + env->xmm_regs[i].ZMM_Q(3) = 0; + } +} + +void helper_vzeroupper_hi8(CPUX86State *env) +{ + int i; + + for (i = 8; i < 16; i++) { + env->xmm_regs[i].ZMM_Q(2) = 0; + env->xmm_regs[i].ZMM_Q(3) = 0; + } +} +#endif +#endif + #undef SSE_HELPER_S #undef SHIFT diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h index cfcfba154b..48f0945917 100644 --- a/target/i386/ops_sse_header.h +++ b/target/i386/ops_sse_header.h @@ -411,6 +411,15 @@ DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32) DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32) #endif +#if SHIFT == 2 +DEF_HELPER_1(vzeroall, void, env) +DEF_HELPER_1(vzeroupper, void, env) +#ifdef TARGET_X86_64 +DEF_HELPER_1(vzeroall_hi8, void, env) +DEF_HELPER_1(vzeroupper_hi8, void, env) +#endif +#endif + #undef SHIFT #undef Reg #undef SUFFIX diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index bcd6d47fd0..ba70aeb039 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3455,9 +3455,29 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, return; } if (b == 0x77) { - /* emms */ - gen_helper_emms(cpu_env); - return; + if (s->prefix & PREFIX_VEX) { + CHECK_AVX(s); + if (s->vex_l) { + gen_helper_vzeroall(cpu_env); +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_helper_vzeroall_hi8(cpu_env); + } +#endif + } else { + gen_helper_vzeroupper(cpu_env); +#ifdef TARGET_X86_64 + if (CODE64(s)) { + gen_helper_vzeroupper_hi8(cpu_env); + } +#endif + } + return; + } else { + /* emms */ + gen_helper_emms(cpu_env); + return; + } } /* prepare MMX state (XXX: optimize by storing fptt and fptags in the static cpu state) */ -- 2.36.0