>From armv6, the architecture supports unaligned accesses. All we need to do is perform the correct alignment check in tcg_out_tlb_read and not use LDRD/STRD when the access is not aligned.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- tcg/arm/tcg-target.c.inc | 69 ++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 0c7e4f8411..c55167cc84 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -34,13 +34,6 @@ bool use_idiv_instructions; bool use_neon_instructions; #endif -/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */ -#ifdef CONFIG_SOFTMMU -# define USING_SOFTMMU 1 -#else -# define USING_SOFTMMU 0 -#endif - #ifdef CONFIG_DEBUG_TCG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", @@ -1526,15 +1519,20 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, int fast_off = TLB_MASK_TABLE_OFS(mem_index); int mask_off = fast_off + offsetof(CPUTLBDescFast, mask); int table_off = fast_off + offsetof(CPUTLBDescFast, table); - unsigned s_bits = opc & MO_SIZE; - unsigned a_bits = get_alignment_bits(opc); + unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; + unsigned a_mask = (1 << get_alignment_bits(opc)) - 1; + TCGReg t_addr; /* - * We don't support inline unaligned acceses, but we can easily - * support overalignment checks. + * For v7, support for unaligned accesses is mandatory. + * For v6, support for unaligned accesses is enabled by SCTLR.U, + * which is enabled by (at least) Linux and FreeBSD. + * For v4 and v5, unaligned accesses are... complicated, and + * unhelped by Linux having a global not per-process flag + * for unaligned handling. */ - if (a_bits < s_bits) { - a_bits = s_bits; + if (!use_armv6_instructions && a_mask < s_mask) { + a_mask = s_mask; } /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ @@ -1578,27 +1576,32 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, /* * Check alignment, check comparators. - * Do this in no more than 3 insns. Use MOVW for v7, if possible, + * Do this in 2-4 insns. Use MOVW for v7, if possible, * to reduce the number of sequential conditional instructions. * Almost all guests have at least 4k pages, which means that we need * to clear at least 9 bits even for an 8-byte memory, which means it * isn't worth checking for an immediate operand for BIC. */ + /* For unaligned accesses, test the page of the last byte. */ + t_addr = addrlo; + if (a_mask < s_mask) { + t_addr = TCG_REG_R0; + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, + addrlo, s_mask - a_mask); + } if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { - tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1)); - - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask); + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - addrlo, TCG_REG_TMP, 0); + t_addr, TCG_REG_TMP, 0); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); } else { - if (a_bits) { - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, - (1 << a_bits) - 1); + if (a_mask) { + tcg_debug_assert(a_mask <= 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); } - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo, + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP, + tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS)); } @@ -1763,8 +1766,9 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc, tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend); break; case MO_Q: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + /* LDRD requires alignment; double-check that. */ + if (use_armv6_instructions + && get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend); } else if (datalo != addend) { @@ -1806,8 +1810,9 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_Q: - /* Avoid ldrd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + /* LDRD requires alignment; double-check that. */ + if (use_armv6_instructions + && get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0); } else if (datalo == addrlo) { @@ -1882,8 +1887,9 @@ static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, MemOp opc, tcg_out_st32_r(s, cond, datalo, addrlo, addend); break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + /* STRD requires alignment; double-check that. */ + if (use_armv6_instructions + && get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_r(s, cond, datalo, addrlo, addend); } else { @@ -1914,8 +1920,9 @@ static inline void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0); break; case MO_64: - /* Avoid strd for user-only emulation, to handle unaligned. */ - if (USING_SOFTMMU && use_armv6_instructions + /* STRD requires alignment; double-check that. */ + if (use_armv6_instructions + && get_alignment_bits(opc) >= MO_64 && (datalo & 1) == 0 && datahi == datalo + 1) { tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0); } else { -- 2.25.1