Hi, Richard: On 11/11/2021 11:53 PM, Richard Henderson wrote: > On 11/11/21 2:35 AM, Xiaojuan Yang wrote: >> This patch introduces basic TLB interfaces. >> >> Signed-off-by: Xiaojuan Yang <yangxiaoj...@loongson.cn> >> Signed-off-by: Song Gao <gaos...@loongson.cn> >> --- >> target/loongarch/cpu-param.h | 3 + >> target/loongarch/cpu.c | 36 ++++ >> target/loongarch/cpu.h | 57 ++++++ >> target/loongarch/internals.h | 7 + >> target/loongarch/machine.c | 56 ++++++ >> target/loongarch/meson.build | 1 + >> target/loongarch/tlb_helper.c | 339 ++++++++++++++++++++++++++++++++++ >> 7 files changed, 499 insertions(+) >> create mode 100644 target/loongarch/tlb_helper.c >> >> diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h >> index 9a769b67e0..5a2147fb90 100644 >> --- a/target/loongarch/cpu-param.h >> +++ b/target/loongarch/cpu-param.h >> @@ -12,6 +12,9 @@ >> #define TARGET_PHYS_ADDR_SPACE_BITS 48 >> #define TARGET_VIRT_ADDR_SPACE_BITS 48 >> +#define TARGET_PHYS_MASK ((1UL << TARGET_PHYS_ADDR_SPACE_BITS) - 1) >> +#define TARGET_VIRT_MASK ((1UL << TARGET_VIRT_ADDR_SPACE_BITS) - 1) > > As before, unsigned long is wrong; use MAKE_64BIT_MASK. > > These do not belong in cpu-param.h anyway; probably only tlb_helper.c needs > them. > >> +#ifndef CONFIG_USER_ONLY >> + qemu_fprintf(f, "EUEN 0x%lx\n", env->CSR_EUEN); >> + qemu_fprintf(f, "ESTAT 0x%lx\n", env->CSR_ESTAT); >> + qemu_fprintf(f, "ERA 0x%lx\n", env->CSR_ERA); >> + qemu_fprintf(f, "CRMD 0x%lx\n", env->CSR_CRMD); >> + qemu_fprintf(f, "PRMD 0x%lx\n", env->CSR_PRMD); >> + qemu_fprintf(f, "BadVAddr 0x%lx\n", env->CSR_BADV); >> + qemu_fprintf(f, "TLB refill ERA 0x%lx\n", env->CSR_TLBRERA); >> + qemu_fprintf(f, "TLB refill BadV 0x%lx\n", env->CSR_TLBRBADV); >> + qemu_fprintf(f, "EENTRY 0x%lx\n", env->CSR_EENTRY); >> + qemu_fprintf(f, "BadInstr 0x%lx\n", env->CSR_BADI); >> + qemu_fprintf(f, "PRCFG1 0x%lx\nPRCFG2 0x%lx\nPRCFG3 0x%lx\n", >> + env->CSR_PRCFG1, env->CSR_PRCFG3, env->CSR_PRCFG3); > > %lx is wrong; use PRIx64. > >> +#define LOONGARCH_TLB_MAX 2112 /* 2048 STLB + 64 MTLB */ > > Better to write (2048 + 64). > >> +FIELD(TLB_MISC, E, 0, 1) >> +FIELD(TLB_MISC, ASID, 1, 10) >> +FIELD(TLB_MISC, G, 11, 1) >> +FIELD(TLB_MISC, PS, 12, 6) >> +FIELD(TLB_MISC, VPPN, 18, 35) >> + >> +/* Corresponding to CSR_TLBELO0/1 */ >> +FIELD(ENTRY0, V, 0, 1) >> +FIELD(ENTRY0, D, 1, 1) >> +FIELD(ENTRY0, NR, 2, 1) >> +FIELD(ENTRY0, NX, 3, 1) >> +FIELD(ENTRY0, MAT, 4, 2) >> +FIELD(ENTRY0, PLV, 6, 2) >> +FIELD(ENTRY0, RPLV, 8, 1) >> +FIELD(ENTRY0, PPN, 9, 36) >> + >> +FIELD(ENTRY1, V, 0, 1) >> +FIELD(ENTRY1, D, 1, 1) >> +FIELD(ENTRY1, NR, 2, 1) >> +FIELD(ENTRY1, NX, 3, 1) >> +FIELD(ENTRY1, MAT, 4, 2) >> +FIELD(ENTRY1, PLV, 6, 2) >> +FIELD(ENTRY1, RPLV, 8, 1) >> +FIELD(ENTRY1, PPN, 9, 36) > > Why are you duplicating the CSR_TLBELO* fields? > >> +const VMStateInfo vmstate_info_tlb = { >> + .name = "tlb_entry", >> + .get = get_tlb, >> + .put = put_tlb, >> +}; > > Better to use .fields. > >> +#define VMSTATE_TLB_ARRAY_V(_f, _s, _n, _v) \ >> + VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_tlb, loongarch_tlb) >> + >> +#define VMSTATE_TLB_ARRAY(_f, _s, _n) \ >> + VMSTATE_TLB_ARRAY_V(_f, _s, _n, 0) > > Don't need these. > >> + >> +const VMStateDescription vmstate_tlb = { >> + .name = "cpu/tlb", >> + .version_id = 0, >> + .minimum_version_id = 0, >> + .fields = (VMStateField[]) { >> + VMSTATE_TLB_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX), > > VMSTATE_STRUCT_ARRAY. > >> + VMSTATE_END_OF_LIST() >> + } >> +}; >> /* LoongArch CPU state */ >> @@ -22,6 +70,10 @@ const VMStateDescription vmstate_loongarch_cpu = { >> VMSTATE_UINT64_ARRAY(env.fpr, LoongArchCPU, 32), >> VMSTATE_UINT32(env.fcsr0, LoongArchCPU), >> + /* TLB */ >> + VMSTATE_UINT32(env.stlb_size, LoongArchCPU), >> + VMSTATE_UINT32(env.mtlb_size, LoongArchCPU), > > Might as well keep these in vmstate_tlb.
All of the vmstate has been modified. >> +/* TLB address map */ >> +static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, >> + int *prot, target_ulong address, >> + int access_type, loongarch_tlb *tlb) >> +{ >> + uint64_t plv = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PLV); > > Incorrect. PLV associated with mmu_idx, so you need to use that. > >> + uint8_t tlb_ps, n, tlb_v0, tlb_v1, tlb_d0, tlb_d1; >> + uint8_t tlb_nx0, tlb_nx1, tlb_nr0, tlb_nr1; >> + uint64_t tlb_ppn0, tlb_ppn1; >> + uint8_t tlb_rplv0, tlb_rplv1, tlb_plv0, tlb_plv1; >> + >> + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); >> + n = (address >> tlb_ps) & 0x1;/* Odd or even */ > > Surely you need to pass in tlb_ps, since it's not present in the STLB entries. > >> + >> + tlb_v0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, V); >> + tlb_d0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, D); >> + tlb_plv0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, PLV); >> + tlb_ppn0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, PPN); >> + tlb_nx0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, NX); >> + tlb_nr0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, NR); >> + tlb_rplv0 = FIELD_EX64(tlb->tlb_entry0, ENTRY0, RPLV); >> + >> + tlb_v1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, V); >> + tlb_d1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, D); >> + tlb_plv1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, PLV); >> + tlb_ppn1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, PPN); >> + tlb_nx1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, NX); >> + tlb_nr1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, NR); >> + tlb_rplv1 = FIELD_EX64(tlb->tlb_entry1, ENTRY1, RPLV); > > Better to check N first. > > entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; > v = FIELD_EX64(entry, TLBENTRY, V); > > etc. oh, yes , it is Much simpler when check N first. Your advice is always so appropriate. Thanks > >> + *physical = (tlb_ppn1 << 12) | (address & ((1 << tlb_ps) - 1)); > > TARGET_PAGE_SIZE. Maybe TARGET_PAGE_SIZE is not fit for a huge page. MAKE_64BIT_MASK(0, tlb_ps) is ok? > >> +/* LoongArch 3A5000 -style MMU emulation */ > > There's no mention of 3A5000 in "LoongArch Reference Manual", which defines > this. I think you've copied this from MIPS, which talks about r4k here. > >> +static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, >> + int *prot, >> + target_ulong address, >> + MMUAccessType access_type) >> +{ >> + loongarch_tlb *tlb; >> + uint16_t csr_asid, tlb_asid, stlb_idx; >> + uint8_t tlb_e, stlb_ps, tlb_ps, tlb_g; >> + int i, stlb_size, mtlb_size; >> + uint64_t vpn, tlb_vppn; /* Address to map */ >> + >> + stlb_size = env->stlb_size; >> + mtlb_size = env->mtlb_size; >> + csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); >> + >> + /* Search MTLB */ >> + for (i = stlb_size; i < stlb_size + mtlb_size; ++i) { > > Bit of a shame to have a linear search. I guess it's ok for a start, but > you'll find that this function is critical to the emulation speed of qemu, so > you should think about other ways to organize the data. The stlb search by the set, the mtlb uses the linear search, I have no other idea, do you have some advice? > >> + tlb = &env->tlb[i]; >> + tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); >> + tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); >> + >> + vpn = (address & TARGET_VIRT_MASK) >> (tlb_ps + 1); > > Why +1? One tlb entry holds a adjacent odd/even pair, the vpn is the content of the virtual page number divided by 2. So the compare vpn is bit[47:15] for 16KB page. while the vppn field in tlb entry contains bit[47:13], so need adjust. > >> + tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); >> + tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); >> + tlb_g = FIELD_EX64(tlb->tlb_misc, TLB_MISC, G); >> + >> + if ((tlb_g == 1 || tlb_asid == csr_asid) && >> + (vpn == (tlb_vppn >> (tlb_ps + 1 - 13))) && tlb_e) { > > Surely extract and test the enable bit E before anything else, just to speed > up the lookup. > > The -13 is a bit of a magic number. Surely TARGET_PAGE_BITS + 1. OK, I will use the R_TLB_MISC_VPPN_SHIFT replace the magic 13. > >> +static int get_physical_address(CPULoongArchState *env, hwaddr *physical, >> + int *prot, target_ulong real_address, >> + MMUAccessType access_type, int mmu_idx) >> +{ >> + int user_mode = mmu_idx == LOONGARCH_HFLAG_UM; >> + int kernel_mode = !user_mode; > > Incorrect. PLV == mmu_idx, as defined by cpu_mmu_index. > >> + unsigned plv, base_c, base_v, tmp; >> + uint64_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); >> + >> + /* Effective address */ >> + target_ulong address = real_address; >> + >> + /* Check PG */ >> + if (!pg) { >> + /* DA mode */ >> + *physical = address & TARGET_PHYS_MASK; >> + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; >> + return TLBRET_MATCH; >> + } > > You need to define a fifth mmu_index for paging disabled, because the > software tlb handler runs in this mode. If you don't define that, you'll > have to flush the softmmu tlb every time you have a tlb miss. > >> + plv = kernel_mode | (user_mode << 3); > > plv_mask = 1 << plv; > >> + base_v = address >> CSR_DMW_BASE_SH; >> + /* Check direct map window 0 */ >> + base_c = env->CSR_DMWIN0 >> CSR_DMW_BASE_SH; >> + if ((plv & env->CSR_DMWIN0) && (base_c == base_v)) { >> + *physical = dmwin_va2pa(address); >> + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; >> + return TLBRET_MATCH; >> + } >> + /* Check direct map window 1 */ >> + base_c = env->CSR_DMWIN1 >> CSR_DMW_BASE_SH; >> + if ((plv & env->CSR_DMWIN1) && (base_c == base_v)) { >> + *physical = dmwin_va2pa(address); >> + *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; >> + return TLBRET_MATCH; >> + } > > There are more than 2 of these, which is why I suggested putting them in an > array, so that you can loop. Maybe pull this out to a separate function, > like the tlb lookup? > >> + /* Check valid extension */ >> + tmp = address >> (TARGET_VIRT_ADDR_SPACE_BITS - 1); >> + if (!(tmp == 0 || tmp == 0x1ffff)) { > > Better to cast to int64_t first, so that this becomes 0 or -1, without > knowing that 64 - TARGET_VIRT_ADDR_SPACE_BITS - 1 -> 0x1ffff. > >> +void loongarch_mmu_init(CPULoongArchState *env) >> +{ >> + /* Number of MTLB */ >> + env->mtlb_size = 64; >> + >> + /* Number of STLB */ >> + env->stlb_size = 2048; >> + >> + /* For 16KB, ps = 14, compare the bit [47:15] */ >> + for (int i = 0; i < LOONGARCH_TLB_MAX; i++) { >> + env->tlb[i].tlb_misc = FIELD_DP64(env->tlb[i].tlb_misc, TLB_MISC, >> E, 0); >> + } > > Just memset the whole thing? yes, the tlb is empty at the start. > >> +bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, >> + MMUAccessType access_type, int mmu_idx, >> + bool probe, uintptr_t retaddr) >> +{ >> + LoongArchCPU *cpu = LOONGARCH_CPU(cs); >> + CPULoongArchState *env = &cpu->env; >> + hwaddr physical; >> + int prot; >> + int ret = TLBRET_BADADDR; >> + >> + /* Data access */ >> + /* XXX: put correct access by using cpu_restore_state() correctly */ >> + ret = get_physical_address(env, &physical, &prot, address, >> + access_type, mmu_idx); >> + switch (ret) { >> + case TLBRET_MATCH: >> + qemu_log_mask(CPU_LOG_MMU, >> + "%s address=%" VADDR_PRIx " physical " TARGET_FMT_plx >> + " prot %d\n", __func__, address, physical, prot); >> + break; >> + default: >> + qemu_log_mask(CPU_LOG_MMU, >> + "%s address=%" VADDR_PRIx " ret %d\n", __func__, >> address, >> + ret); >> + break; >> + } >> + if (ret == TLBRET_MATCH) { >> + tlb_set_page(cs, address & TARGET_PAGE_MASK, >> + physical & TARGET_PAGE_MASK, prot, >> + mmu_idx, TARGET_PAGE_SIZE); >> + return true; > > Merge the above switch into this? > >> + } >> + if (probe) { >> + return false; >> + } else { >> + raise_mmu_exception(env, address, access_type, ret); >> + do_raise_exception(env, cs->exception_index, retaddr); >> + } >> +} >> > > > r~