(this is mostly to get some ideas going rather than trying to get anything upstream... yet!)
This version of tcg_out_mov for i386's tcg-target.c filters out the mov %ebx, %edx mov %ebx, [some index] mov %edx, %ebx I don't have benchmarks, but it does remove a few mov's and qemu can still load and run a linux kernel ;) It'd be easier to do more optimizations if qemu recorded output at the assembly instruction level. --- static uint8_t *lmovloc = 0; static int lmovret = -1, lmovarg = -1; static inline void tcg_out_mov(TCGContext *s, int ret, int arg) { int ldiff, nowrite = 0; if (arg != ret) { /* Check for a mov, mov->x, mov pattern */ ldiff = s->code_ptr - lmovloc; if (((ldiff == 8) || (ldiff == 5)) && (*(lmovloc + 2) == 0x89) && ((lmovret == ret) && (lmovarg == arg))) nowrite = 1; /* Write */ lmovloc = s->code_ptr; if (!nowrite) { tcg_out_modrm(s, 0x8b, ret, arg); } else { qemu_log("removed\n"); } lmovret = arg; lmovarg = ret; } }