Low hanging fruit, using gvec to move 16 bytes. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/i386/cpu.h | 4 ++-- target/i386/tcg/translate.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 81e5abed86..dbc9a99a3b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1587,8 +1587,8 @@ typedef struct CPUArchState { float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; - ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32]; - ZMMReg xmm_t0; + ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] QEMU_ALIGNED(16); + ZMMReg xmm_t0 QEMU_ALIGNED(16); MMXReg mmx_t0; uint64_t opmask_regs[NB_OPMASK_REGS]; diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index b7972f0ff5..c1f1f6f66b 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -23,6 +23,7 @@ #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" #include "exec/cpu_ldst.h" #include "exec/translator.h" @@ -2753,10 +2754,8 @@ static inline void gen_sto_env_A0(DisasContext *s, int offset) static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) { - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1))); + int xmm_ofs = offsetof(ZMMReg, ZMM_X(0)); + tcg_gen_gvec_mov(MO_64, d_offset + xmm_ofs, s_offset + xmm_ofs, 16, 16); } static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset) -- 2.34.1