This makes CPUTLBEntry agnostic to the address size of the guest. When 32-bit addresses are in effect, we can simply read the low 32 bits of the 64-bit field. Similarly when we need to update the field for setting TLB_NOTDIRTY.
For TCG backends that could in theory be big-endian, but in practice are not (arm, loongarch, riscv), use QEMU_BUILD_BUG_ON to document and ensure this is not accidentally missed. For s390x, which is always big-endian, use HOST_BIG_ENDIAN anyway, to document the reason for the adjustment. For sparc64 and ppc64, always perform a 64-bit load, and rely on the following 32-bit comparison to ignore the high bits. Rearrange mips and ppc if ladders for clarity. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- include/exec/cpu-defs.h | 37 +++++++++++--------------------- include/exec/cpu_ldst.h | 19 ++++++++++------ accel/tcg/cputlb.c | 8 +++++-- tcg/aarch64/tcg-target.c.inc | 1 + tcg/arm/tcg-target.c.inc | 1 + tcg/loongarch64/tcg-target.c.inc | 1 + tcg/mips/tcg-target.c.inc | 13 ++++++----- tcg/ppc/tcg-target.c.inc | 28 +++++++++++++----------- tcg/riscv/tcg-target.c.inc | 1 + tcg/s390x/tcg-target.c.inc | 1 + tcg/sparc64/tcg-target.c.inc | 8 +++++-- 11 files changed, 67 insertions(+), 51 deletions(-) diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index a6e0cf1812..b757d37966 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -65,11 +65,7 @@ /* use a fully associative victim tlb of 8 entries */ #define CPU_VTLB_SIZE 8 -#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32 -#define CPU_TLB_ENTRY_BITS 4 -#else #define CPU_TLB_ENTRY_BITS 5 -#endif #define CPU_TLB_DYN_MIN_BITS 6 #define CPU_TLB_DYN_DEFAULT_BITS 8 @@ -95,33 +91,26 @@ # endif /* Minimalized TLB entry for use by TCG fast path. */ -typedef struct CPUTLBEntry { - /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address - bit TARGET_PAGE_BITS-1..4 : Nonzero for accesses that should not - go directly to ram. - bit 3 : indicates that the entry is invalid - bit 2..0 : zero - */ - union { - struct { - target_ulong addr_read; - target_ulong addr_write; - target_ulong addr_code; - /* Addend to virtual address to get host address. IO accesses - use the corresponding iotlb value. */ - uintptr_t addend; - }; +typedef union CPUTLBEntry { + struct { + uint64_t addr_read; + uint64_t addr_write; + uint64_t addr_code; /* - * Padding to get a power of two size, as well as index - * access to addr_{read,write,code}. + * Addend to virtual address to get host address. IO accesses + * use the corresponding iotlb value. */ - target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE]; + uintptr_t addend; }; + /* + * Padding to get a power of two size, as well as index + * access to addr_{read,write,code}. + */ + uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)]; } CPUTLBEntry; QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS)); - #endif /* !CONFIG_USER_ONLY && CONFIG_TCG */ #if !defined(CONFIG_USER_ONLY) diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 7c867c94c3..f916a96a31 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -365,18 +365,25 @@ static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry, { /* Do not rearrange the CPUTLBEntry structure members. */ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) != - MMU_DATA_LOAD * TARGET_LONG_SIZE); + MMU_DATA_LOAD * sizeof(uint64_t)); QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) != - MMU_DATA_STORE * TARGET_LONG_SIZE); + MMU_DATA_STORE * sizeof(uint64_t)); QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) != - MMU_INST_FETCH * TARGET_LONG_SIZE); + MMU_INST_FETCH * sizeof(uint64_t)); - const target_ulong *ptr = &entry->addr_idx[access_type]; -#if TCG_OVERSIZED_GUEST - return *ptr; +#if TARGET_LONG_BITS == 32 + /* Use qatomic_read, in case of addr_write; only care about low bits. */ + const uint32_t *ptr = (uint32_t *)&entry->addr_idx[access_type]; + ptr += HOST_BIG_ENDIAN; + return qatomic_read(ptr); #else + const uint64_t *ptr = &entry->addr_idx[access_type]; +# if TCG_OVERSIZED_GUEST + return *ptr; +# else /* ofs might correspond to .addr_write, so use qatomic_read */ return qatomic_read(ptr); +# endif #endif } diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index e494404a0d..83297f9bff 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1000,11 +1000,15 @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry, addr &= TARGET_PAGE_MASK; addr += tlb_entry->addend; if ((addr - start) < length) { -#if TCG_OVERSIZED_GUEST +#if TARGET_LONG_BITS == 32 + uint32_t *ptr_write = (uint32_t *)&tlb_entry->addr_write; + ptr_write += HOST_BIG_ENDIAN; + qatomic_set(ptr_write, *ptr_write | TLB_NOTDIRTY); +#elif TCG_OVERSIZED_GUEST tlb_entry->addr_write |= TLB_NOTDIRTY; #else qatomic_set(&tlb_entry->addr_write, - tlb_entry->addr_write | TLB_NOTDIRTY); + tlb_entry->addr_write | TLB_NOTDIRTY); #endif } } diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index fa8f3a7629..e94d3f2c2c 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1707,6 +1707,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0); /* Load the tlb comparator into X0, and the fast path addend into X1. */ + QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1, is_ld ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b6e3a4c902..92fc5e35f5 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1417,6 +1417,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. * Load the tlb comparator into R2/R3 and the fast path addend into R1. */ + QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); if (cmp_off == 0) { if (s->addr_type == TCG_TYPE_I32) { tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 04d1e0f24d..56f95980c2 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -877,6 +877,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); /* Load the tlb comparator and the addend. */ + QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, is_ld ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 836ae58819..64e5bfc39e 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1201,14 +1201,17 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1); + if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) { + /* Load the (low half) tlb comparator. */ + tcg_out_ld(s, TCG_TYPE_I32, TCG_TMP0, TCG_TMP3, + cmp_off + HOST_BIG_ENDIAN * 4); + } else { + tcg_out_ld(s, TCG_TYPE_I64, TCG_TMP0, TCG_TMP3, cmp_off); + } + if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) { - /* Load the tlb comparator. */ - tcg_out_ld(s, addr_type, TCG_TMP0, TCG_TMP3, cmp_off); /* Load the tlb addend for the fast path. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off); - } else { - /* Load the low half of the tlb comparator. */ - tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF); } /* diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index a81241e799..8dcde4ed14 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2107,20 +2107,24 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, } tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); - /* Load the (low part) TLB comparator into TMP2. */ - if (cmp_off == 0 - && (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32)) { - uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32 - ? LWZUX : LDUX); - tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); + /* + * Load the (low part) TLB comparator into TMP2. + * For 64-bit host, always load the entire 64-bit slot for simplicity. + * We will ignore the high bits with tcg_out_cmp(..., addr_type). + */ + if (TCG_TARGET_REG_BITS == 64) { + if (cmp_off == 0) { + tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); + } else { + tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); + tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); + } + } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) { + tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2)); } else { tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2)); - if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) { - tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, - TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN); - } else { - tcg_out_ld(s, addr_type, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off); - } + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1, + cmp_off + 4 * HOST_BIG_ENDIAN); } /* diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 3f5437fa84..cddcfae1f9 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -963,6 +963,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, } /* Load the tlb comparator and the addend. */ + QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN); tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, is_ld ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write)); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 4898b01e6a..84ba6638d6 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1800,6 +1800,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ofs = offsetof(CPUTLBEntry, addr_write); } if (addr_type == TCG_TYPE_I32) { + ofs += HOST_BIG_ENDIAN * 4; tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs); } else { tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index e00ecd2cf8..f169f41521 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1064,8 +1064,12 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T3, ARITH_ADD); - /* Load the tlb comparator and the addend. */ - tcg_out_ld(s, addr_type, TCG_REG_T2, TCG_REG_T1, cmp_off); + /* + * Load the tlb comparator and the addend. + * Always load the entire 64-bit comparator for simplicity. + * We will ignore the high bits via BPCC_ICC below. + */ + tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_T2, TCG_REG_T1, cmp_off); tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T1, TCG_REG_T1, add_off); h->base = TCG_REG_T1; -- 2.34.1