On 09/01/17 13:53, David Gibson wrote: > On Mon, Jan 09, 2017 at 01:38:26PM +1100, Alexey Kardashevskiy wrote: >> On 03/01/17 13:26, David Gibson wrote: >>> On Thu, Dec 22, 2016 at 12:13:12PM +1100, Alexey Kardashevskiy wrote: >>>> KVM_CAP_SPAPR_TCE capability allows creating TCE tables in KVM which >>>> allows having in-kernel acceleration for H_PUT_TCE_xxx hypercalls. >>>> However it only supports 32bit DMA windows at zero bus offset. >>>> >>>> There is a new KVM_CAP_SPAPR_TCE_64 capability which supports 64bit >>>> window size, variable page size and bus offset. >>>> >>>> This makes use of the new capability. The kernel headers are already >>>> updated as the kernel support went in to v4.6. >>>> >>>> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >>>> --- >>>> target-ppc/kvm_ppc.h | 12 +++++++----- >>>> hw/ppc/spapr_iommu.c | 8 +++++--- >>>> target-ppc/kvm.c | 48 +++++++++++++++++++++++++++++++++++++----------- >>>> 3 files changed, 49 insertions(+), 19 deletions(-) >>>> >>>> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h >>>> index bd1d78bfbe..14320c2378 100644 >>>> --- a/target-ppc/kvm_ppc.h >>>> +++ b/target-ppc/kvm_ppc.h >>>> @@ -36,8 +36,9 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); >>>> #ifndef CONFIG_USER_ONLY >>>> off_t kvmppc_alloc_rma(void **rma); >>>> bool kvmppc_spapr_use_multitce(void); >>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int >>>> *pfd, >>>> - bool need_vfio); >>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, >>>> + uint64_t bus_offset, uint32_t nb_table, >>>> + int *pfd, bool need_vfio); >>>> int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); >>>> int kvmppc_reset_htab(int shift_hint); >>>> uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift); >>>> @@ -168,9 +169,10 @@ static inline bool kvmppc_spapr_use_multitce(void) >>>> return false; >>>> } >>>> >>>> -static inline void *kvmppc_create_spapr_tce(uint32_t liobn, >>>> - uint32_t window_size, int *fd, >>>> - bool need_vfio) >>>> +static inline void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t >>>> page_shift, >>>> + uint64_t bus_offset, >>>> + uint32_t nb_table, >>>> + int *pfd, bool need_vfio) >>>> { >>>> return NULL; >>>> } >>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >>>> index ae30bbe30f..29c80bb3c8 100644 >>>> --- a/hw/ppc/spapr_iommu.c >>>> +++ b/hw/ppc/spapr_iommu.c >>>> @@ -79,15 +79,16 @@ static IOMMUAccessFlags >>>> spapr_tce_iommu_access_flags(uint64_t tce) >>>> >>>> static uint64_t *spapr_tce_alloc_table(uint32_t liobn, >>>> uint32_t page_shift, >>>> + uint64_t bus_offset, >>>> uint32_t nb_table, >>>> int *fd, >>>> bool need_vfio) >>>> { >>>> uint64_t *table = NULL; >>>> - uint64_t window_size = (uint64_t)nb_table << page_shift; >>>> >>>> - if (kvm_enabled() && !(window_size >> 32)) { >>>> - table = kvmppc_create_spapr_tce(liobn, window_size, fd, >>>> need_vfio); >>>> + if (kvm_enabled()) { >>> >>> This is broken. Previously, if we had a >4GiB window, we'd fall back >>> to managing it in userspace, which would work, albeit slowly. Now, if >>> you have an older kernel which doesn't support KVM_CAP_SPAPR_TCE_64 it >>> will attempt to allocate it in the kernel, and fail completely. >> >> >> No, kvmppc_create_spapr_tce() would return NULL and right after that there >> is a "if (!table)" (it can be seen at the end of this chunk) to handle the >> failure. > > Oh, yes, sorry. For some reason I thought there was a return in that > if block.
This was the only comment which turned out to be not a bug, what now? :) > >>> >>>> + table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, >>>> nb_table, >>>> + fd, need_vfio); >>>> } >>>> >>>> if (!table) { >>>> @@ -342,6 +343,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet, >>>> tcet->nb_table = nb_table; >>>> tcet->table = spapr_tce_alloc_table(tcet->liobn, >>>> tcet->page_shift, >>>> + tcet->bus_offset, >>>> tcet->nb_table, >>>> &tcet->fd, >>>> tcet->need_vfio); >>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >>>> index 9c4834c4fc..6e91a4d8bb 100644 >>>> --- a/target-ppc/kvm.c >>>> +++ b/target-ppc/kvm.c >>>> @@ -71,6 +71,7 @@ static int cap_booke_sregs; >>>> static int cap_ppc_smt; >>>> static int cap_ppc_rma; >>>> static int cap_spapr_tce; >>>> +static int cap_spapr_tce_64; >>>> static int cap_spapr_multitce; >>>> static int cap_spapr_vfio; >>>> static int cap_hior; >>>> @@ -123,6 +124,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) >>>> cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); >>>> cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); >>>> cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); >>>> + cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64); >>>> cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); >>>> cap_spapr_vfio = false; >>>> cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); >>>> @@ -2201,13 +2203,10 @@ bool kvmppc_spapr_use_multitce(void) >>>> return cap_spapr_multitce; >>>> } >>>> >>>> -void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int >>>> *pfd, >>>> - bool need_vfio) >>>> +void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, >>>> + uint64_t bus_offset, uint32_t nb_table, >>>> + int *pfd, bool need_vfio) >>>> { >>>> - struct kvm_create_spapr_tce args = { >>>> - .liobn = liobn, >>>> - .window_size = window_size, >>>> - }; >>>> long len; >>>> int fd; >>>> void *table; >>>> @@ -2220,14 +2219,41 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, >>>> uint32_t window_size, int *pfd, >>>> return NULL; >>>> } >>>> >>>> - fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); >>>> - if (fd < 0) { >>>> - fprintf(stderr, "KVM: Failed to create TCE table for liobn >>>> 0x%x\n", >>>> - liobn); >>>> + if (cap_spapr_tce_64) { >>>> + struct kvm_create_spapr_tce_64 args = { >>>> + .liobn = liobn, >>>> + .page_shift = page_shift, >>>> + .offset = bus_offset >> page_shift, >>>> + .size = nb_table, >>>> + .flags = 0 >>>> + }; >>>> + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args); >>>> + if (fd < 0) { >>>> + fprintf(stderr, >>>> + "KVM: Failed to create TCE64 table for liobn 0x%x\n", >>>> + liobn); >>>> + return NULL; >>>> + } >>>> + } else if (cap_spapr_tce) { >>>> + uint64_t window_size = (uint64_t) nb_table << page_shift; >>>> + struct kvm_create_spapr_tce args = { >>>> + .liobn = liobn, >>>> + .window_size = window_size, >>>> + }; >>>> + if ((window_size != args.window_size) || bus_offset) { >>>> + return NULL; >>>> + } >>>> + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args); >>>> + if (fd < 0) { >>>> + fprintf(stderr, "KVM: Failed to create TCE table for liobn >>>> 0x%x\n", >>>> + liobn); >>>> + return NULL; >>>> + } >>>> + } else { >>>> return NULL; >>>> } >>>> >>>> - len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t); >>>> + len = nb_table * sizeof(uint64_t); >>>> /* FIXME: round this up to page size */ >>>> >>>> table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); >>> >> >> > > > > -- Alexey
signature.asc
Description: OpenPGP digital signature