Alexander Graf <ag...@suse.de> writes: > On 23.08.2013, at 06:20, Aneesh Kumar K.V wrote: > >> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com> >> >> With kvm enabled, we store the hash page table information in the hypervisor. >> Use ioctl to read the htab contents. Without this we get the below error when >> trying to read the guest address >> >> (gdb) x/10 do_fork >> 0xc000000000098660 <do_fork>: Cannot access memory at address >> 0xc000000000098660 >> (gdb) >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com> >> --- >> target-ppc/kvm.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ >> target-ppc/kvm_ppc.h | 9 ++++++++- >> target-ppc/mmu-hash64.c | 25 ++++++++++++++++--------- >> 3 files changed, 69 insertions(+), 10 deletions(-) >> >> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >> index 6878af2..bcc6544 100644 >> --- a/target-ppc/kvm.c >> +++ b/target-ppc/kvm.c >> @@ -1891,3 +1891,48 @@ int kvm_arch_on_sigbus(int code, void *addr) >> void kvm_arch_init_irq_routing(KVMState *s) >> { >> } >> + >> +int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, >> + target_ulong *hpte0, target_ulong *hpte1) >> +{ >> + int htab_fd; >> + struct kvm_get_htab_fd ghf; >> + struct kvm_get_htab_buf { >> + struct kvm_get_htab_header header; >> + /* >> + * Older kernel required one extra byte. >> + */ >> + unsigned long hpte[3]; >> + } hpte_buf; >> + >> + *hpte0 = 0; >> + *hpte1 = 0; >> + if (!cap_htab_fd) { >> + return 0; >> + } >> + /* >> + * At this point we are only interested in reading only bolted entries >> + */ >> + ghf.flags = KVM_GET_HTAB_BOLTED_ONLY; >> + ghf.start_index = index; >> + htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); > > We should cache this.
The fd returned by KVM_PPC_GET_HTAB_FD doesn't support a proper lseek interface. ie, we cannot seek around to read the hpte entries at index. The call paths are also not in the hot path. Hence I didn't look at caching. We could definitely avoid doing the ioctl in loop. See below for the changes. > >> + if (htab_fd < 0) { >> + return htab_fd; >> + } >> + >> + if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) { >> + goto out; >> + } >> + /* >> + * We only requested for one entry, So we should get only 1 >> + * valid entry at the same index >> + */ >> + if (hpte_buf.header.n_valid != 1 || hpte_buf.header.index != index) { >> + goto out; >> + } >> + *hpte0 = hpte_buf.hpte[0]; >> + *hpte1 = hpte_buf.hpte[1]; >> +out: >> + close(htab_fd); >> + return 0; >> +} >> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h >> index 4ae7bf2..e25373a 100644 >> --- a/target-ppc/kvm_ppc.h >> +++ b/target-ppc/kvm_ppc.h >> @@ -42,7 +42,8 @@ int kvmppc_get_htab_fd(bool write); >> int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns); >> int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, >> uint16_t n_valid, uint16_t n_invalid); >> - >> +int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, >> + target_ulong *hpte0, target_ulong *hpte1); >> #else >> >> static inline uint32_t kvmppc_get_tbfreq(void) >> @@ -181,6 +182,12 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, >> int fd, uint32_t index, >> abort(); >> } >> >> +static inline int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, >> + target_ulong *hpte0, >> + target_ulong *hpte1) >> +{ >> + abort(); >> +} >> #endif >> >> #ifndef CONFIG_KVM >> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c >> index 67fc1b5..4d8120c 100644 >> --- a/target-ppc/mmu-hash64.c >> +++ b/target-ppc/mmu-hash64.c >> @@ -302,17 +302,26 @@ static int ppc_hash64_amr_prot(CPUPPCState *env, >> ppc_hash_pte64_t pte) >> return prot; >> } >> >> -static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr pteg_off, >> +static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash, >> bool secondary, target_ulong ptem, >> ppc_hash_pte64_t *pte) >> { >> - hwaddr pte_offset = pteg_off; >> + uint64_t index; >> + hwaddr pte_offset; >> target_ulong pte0, pte1; >> int i; >> >> + pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;; >> + index = (hash * HPTES_PER_GROUP) & env->htab_mask; >> + >> for (i = 0; i < HPTES_PER_GROUP; i++) { >> - pte0 = ppc_hash64_load_hpte0(env, pte_offset); >> - pte1 = ppc_hash64_load_hpte1(env, pte_offset); >> + if (kvm_enabled()) { >> + index += i; >> + kvmppc_hash64_load_hpte(ppc_env_get_cpu(env), index, &pte0, >> &pte1); > > This breaks PR KVM which doesn't have an HTAB fd. > > I think what you want is code in kvmppc_set_papr() that tries to fetch > an HTAB fd. You can then modify the check to if (kvm_enabled() && > kvmppc_has_htab_fd()), as the below case should work just fine on PR > KVM. As explained before caching htab fd may not really work. How about diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index fce8835..cf6aca4 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -1892,19 +1892,24 @@ void kvm_arch_init_irq_routing(KVMState *s) { } -int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, - target_ulong *hpte0, target_ulong *hpte1) +hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, + bool secondary, target_ulong ptem, + target_ulong *hpte0, target_ulong *hpte1) { int htab_fd; + uint64_t index; + hwaddr pte_offset; + target_ulong pte0, pte1; struct kvm_get_htab_fd ghf; struct kvm_get_htab_buf { struct kvm_get_htab_header header; /* * Older kernel required one extra byte. */ - unsigned long hpte[3]; + unsigned long hpte[(HPTES_PER_GROUP * 2) + 1]; } hpte_buf; + index = (hash * HPTES_PER_GROUP) & cpu->env.htab_mask; *hpte0 = 0; *hpte1 = 0; if (!cap_htab_fd) { @@ -1917,22 +1922,33 @@ int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, ghf.start_index = index; htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf); if (htab_fd < 0) { - return htab_fd; - } - - if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) { - goto out; + goto error_out; } /* - * We only requested for one entry, So we should get only 1 - * valid entry at the same index + * Read the hpte group */ - if (hpte_buf.header.n_valid != 1 || hpte_buf.header.index != index) { + if (read(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) { goto out; } - *hpte0 = hpte_buf.hpte[0]; - *hpte1 = hpte_buf.hpte[1]; + + index = 0; + pte_offset = (hash * HASH_PTEG_SIZE_64) & cpu->env.htab_mask;; + while (index < hpte_buf.header.n_valid) { + pte0 = hpte_buf.hpte[(index * 2)]; + pte1 = hpte_buf.hpte[(index * 2) + 1]; + if ((pte0 & HPTE64_V_VALID) + && (secondary == !!(pte0 & HPTE64_V_SECONDARY)) + && HPTE64_V_COMPARE(pte0, ptem)) { + *hpte0 = pte0; + *hpte1 = pte1; + close(htab_fd); + return pte_offset; + } + index++; + pte_offset += HASH_PTE_SIZE_64; + } out: close(htab_fd); - return 0; +error_out: + return -1; } diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h index e25373a..dad0e57 100644 --- a/target-ppc/kvm_ppc.h +++ b/target-ppc/kvm_ppc.h @@ -42,8 +42,9 @@ int kvmppc_get_htab_fd(bool write); int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns); int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, uint16_t n_valid, uint16_t n_invalid); -int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, - target_ulong *hpte0, target_ulong *hpte1); +hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, + bool secondary, target_ulong ptem, + target_ulong *hpte0, target_ulong *hpte1); #else static inline uint32_t kvmppc_get_tbfreq(void) @@ -182,9 +183,11 @@ static inline int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, abort(); } -static inline int kvmppc_hash64_load_hpte(PowerPCCPU *cpu, uint64_t index, - target_ulong *hpte0, - target_ulong *hpte1) +static inline hwaddr kvmppc_hash64_pteg_search(PowerPCCPU *cpu, hwaddr hash, + bool secondary, + target_ulong ptem, + target_ulong *hpte0, + target_ulong *hpte1) { abort(); } diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 4d8120c..2288fe8 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -306,35 +306,39 @@ static hwaddr ppc_hash64_pteg_search(CPUPPCState *env, hwaddr hash, bool secondary, target_ulong ptem, ppc_hash_pte64_t *pte) { - uint64_t index; hwaddr pte_offset; target_ulong pte0, pte1; - int i; - - pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;; - index = (hash * HPTES_PER_GROUP) & env->htab_mask; - - for (i = 0; i < HPTES_PER_GROUP; i++) { - if (kvm_enabled()) { - index += i; - kvmppc_hash64_load_hpte(ppc_env_get_cpu(env), index, &pte0, &pte1); - } else { + int i, ret = 0; + + if (kvm_enabled()) { + ret = kvmppc_hash64_pteg_search(ppc_env_get_cpu(env), hash, + secondary, ptem, + &pte->pte0, &pte->pte1); + } + /* + * We don't support htab fd, check whether we have a copy of htab + */ + if (!ret) { + pte_offset = (hash * HASH_PTEG_SIZE_64) & env->htab_mask;; + for (i = 0; i < HPTES_PER_GROUP; i++) { pte0 = ppc_hash64_load_hpte0(env, pte_offset); pte1 = ppc_hash64_load_hpte1(env, pte_offset); - } - if ((pte0 & HPTE64_V_VALID) - && (secondary == !!(pte0 & HPTE64_V_SECONDARY)) - && HPTE64_V_COMPARE(pte0, ptem)) { - pte->pte0 = pte0; - pte->pte1 = pte1; - return pte_offset; + if ((pte0 & HPTE64_V_VALID) + && (secondary == !!(pte0 & HPTE64_V_SECONDARY)) + && HPTE64_V_COMPARE(pte0, ptem)) { + pte->pte0 = pte0; + pte->pte1 = pte1; + return pte_offset; + } + pte_offset += HASH_PTE_SIZE_64; } - - pte_offset += HASH_PTE_SIZE_64; + /* + * We didn't find a valid entry. + */ + ret = -1; } - - return -1; + return ret; } static hwaddr ppc_hash64_htab_lookup(CPUPPCState *env, -aneesh