On 08/16/2013 11:15 PM, Alexander Graf wrote: > > On 07.08.2013, at 10:08, Alexey Kardashevskiy wrote: > >> Currently only single TCE entry per requiest is supported (H_PUT_TCE). >> However PAPR+ specification allows multiple entry requests such as >> H_PUT_TCE_INDIRECT and H_STUFFF_TCE. Having less transitions to the host >> kernel via ioctls, support of these calls can accelerate IOMMU operations. >> >> This also removes some leftovers in debug output of the H_PUT_TCE handler. >> >> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >> Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> >> --- >> >> This patch requires a KVM_CAP_SPAPR_MULTITCE capability from kernel headers >> which is not there yet. >> However it still would be nice to have "Reviewed-by" from someone when >> the capability will make it to the upstream. Thanks. >> >> --- >> hw/ppc/spapr.c | 16 ++++++++++-- >> hw/ppc/spapr_iommu.c | 74 >> +++++++++++++++++++++++++++++++++++++++++++++++++--- >> target-ppc/kvm.c | 7 +++++ >> target-ppc/kvm_ppc.h | 7 +++++ >> 4 files changed, 99 insertions(+), 5 deletions(-) >> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c >> index 9494915..a6b1f54 100644 >> --- a/hw/ppc/spapr.c >> +++ b/hw/ppc/spapr.c >> @@ -301,6 +301,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model, >> CPUState *cs; >> uint32_t start_prop = cpu_to_be32(initrd_base); >> uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); >> + char hypertas_propm[] = >> "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" >> + "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-multi-tce"; >> char hypertas_prop[] = >> "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" >> "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"; >> char qemu_hypertas_prop[] = "hcall-memop1"; >> @@ -480,8 +482,18 @@ static void *spapr_create_fdt_skel(const char >> *cpu_model, >> /* RTAS */ >> _FDT((fdt_begin_node(fdt, "rtas"))); >> >> - _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop, >> - sizeof(hypertas_prop)))); >> + /* In TCG mode, the multitce functions, which we implement are a >> + * win. With KVM, we could fall back to the qemu implementation >> + * when KVM doesn't support them, but that would be much slower >> + * than just using the KVM implementations of the single TCE >> + * hypercalls. */ >> + if (kvmppc_spapr_use_multitce()) { >> + _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_propm, >> + sizeof(hypertas_propm)))); >> + } else { >> + _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop, >> + sizeof(hypertas_prop)))); >> + } >> _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas_prop, >> sizeof(qemu_hypertas_prop)))); >> >> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c >> index 3d4a1fc..22b09be 100644 >> --- a/hw/ppc/spapr_iommu.c >> +++ b/hw/ppc/spapr_iommu.c >> @@ -244,6 +244,71 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, >> target_ulong ioba, >> return H_SUCCESS; >> } >> >> +static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, >> + sPAPREnvironment *spapr, >> + target_ulong opcode, target_ulong >> *args) >> +{ >> + int i; >> + target_ulong liobn = args[0]; >> + target_ulong ioba = args[1]; >> + target_ulong tce_list = args[2]; >> + target_ulong npages = args[3]; >> + target_ulong ret = 0; >> + sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); >> + >> + if (tcet) { >> + for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) { > > i++ > >> + target_ulong tce = ldq_phys((tce_list & ~SPAPR_TCE_PAGE_MASK) + > > I think it makes sense to do the masking when you assign the variable - makes > it easier to read. > >> + i * sizeof(target_ulong)); >> + ret = put_tce_emu(tcet, ioba, tce); >> + if (ret) { >> + break; >> + } >> + } >> + return ret; >> + } >> +#ifdef DEBUG_TCE >> + fprintf(stderr, "%s on liobn=" TARGET_FMT_lx > > Could you please convert this into something that doesn't bitrot? Either a > DPRINTF style macro that gets format checking done even when unused or a > trace point.
This file does not have DPRINTF and every time David or I tried to add one, we were loudly shouted not to do this. So - tracepoints. But the file uses #ifdef DEBUG_TCE heavily, and there is already a "bitrot" trace in H_PUT_TCE (the only hcall in the group which is already in the file). So what do I do now with traces? 2 patches - first converts everything to tracepoints and second patch which actually adds multi-tce? Or remove old "bitrot" traces from this patch and repost (I can survive without any debug code in upstream)? I am fine with both ways. >> + " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx >> + " ret = " TARGET_FMT_ld "\n", >> + __func__, liobn, ioba, tce_list, ret); >> +#endif >> + >> + return H_PARAMETER; >> +} >> + >> +static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, > > Same comments as above in this function. > >> + target_ulong opcode, target_ulong *args) >> +{ >> + int i; >> + target_ulong liobn = args[0]; >> + target_ulong ioba = args[1]; >> + target_ulong tce_value = args[2]; >> + target_ulong npages = args[3]; >> + target_ulong ret = 0; >> + sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn); >> + >> + ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1); > > Heh - here you actually do the mask separately. This is good. > >> + >> + if (tcet) { >> + for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) { >> + ret = put_tce_emu(tcet, ioba, tce_value); >> + if (ret) { >> + break; >> + } >> + } >> + return ret; >> + } >> +#ifdef DEBUG_TCE >> + fprintf(stderr, "%s on liobn=" TARGET_FMT_lx >> + " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx >> + " ret = " TARGET_FMT_ld "\n", >> + __func__, liobn, ioba, tce_value, ret); >> +#endif >> + >> + return H_PARAMETER; > > Hrm. These 2 functions look very similar. Does it make sense to merge them > into one with a bool indirect flag? It does not for me. Yes, they look pretty similar but they do opposite things AND args[2] has completely different meaning - value vs. address. I do not see how the function which accepts both tce_value and tce_list will still look easy/nice to read. I can change it though if you insist. The rest I'll fix, thanks for review. >> +} >> + >> static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr, >> target_ulong opcode, target_ulong *args) >> { >> @@ -258,9 +323,10 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, >> sPAPREnvironment *spapr, >> return put_tce_emu(tcet, ioba, tce); >> } >> #ifdef DEBUG_TCE >> - fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/ >> - " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx "\n", >> - __func__, liobn, /*dev->qdev.id, */ioba, tce); >> + fprintf(stderr, "%s on liobn=" TARGET_FMT_lx >> + " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx >> + " ret = " TARGET_FMT_ld "\n", >> + __func__, liobn, ioba, tce, ret); >> #endif >> >> return H_PARAMETER; >> @@ -318,6 +384,8 @@ static void spapr_tce_table_class_init(ObjectClass >> *klass, void *data) >> >> /* hcall-tce */ >> spapr_register_hypercall(H_PUT_TCE, h_put_tce); >> + spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect); >> + spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce); >> } >> >> static TypeInfo spapr_tce_table_info = { >> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >> index 8afa7eb..97ac670 100644 >> --- a/target-ppc/kvm.c >> +++ b/target-ppc/kvm.c >> @@ -60,6 +60,7 @@ static int cap_booke_sregs; >> static int cap_ppc_smt; >> static int cap_ppc_rma; >> static int cap_spapr_tce; >> +static int cap_spapr_multitce; >> static int cap_hior; >> static int cap_one_reg; >> static int cap_epr; >> @@ -96,6 +97,7 @@ int kvm_arch_init(KVMState *s) >> cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT); >> cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA); >> cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE); >> + cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE); > > This isn't defined yet, no? Just make it "yes on TCG, no on KVM" for now > and add the multitce cap bit later when the kernel patches are in. > Rest looks good. > > > Alex > >> cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG); >> cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); >> cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); >> @@ -1603,6 +1605,11 @@ uint64_t kvmppc_rma_size(uint64_t current_size, >> unsigned int hash_shift) >> } >> #endif >> >> +bool kvmppc_spapr_use_multitce(void) >> +{ >> + return !kvm_enabled() || cap_spapr_multitce; >> +} >> + >> void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd) >> { >> struct kvm_create_spapr_tce args = { >> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h >> index 12564ef..a2a903f 100644 >> --- a/target-ppc/kvm_ppc.h >> +++ b/target-ppc/kvm_ppc.h >> @@ -31,6 +31,7 @@ int kvmppc_set_tcr(PowerPCCPU *cpu); >> int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); >> #ifndef CONFIG_USER_ONLY >> off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem); >> +bool kvmppc_spapr_use_multitce(void); >> void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int >> *pfd); >> int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size); >> int kvmppc_reset_htab(int shift_hint); >> @@ -125,6 +126,12 @@ static inline off_t kvmppc_alloc_rma(const char *name, >> MemoryRegion *sysmem) >> return 0; >> } >> >> +static inline bool kvmppc_spapr_use_multitce(void) >> +{ >> + /* No KVM, so always use the qemu multitce implementations */ >> + return true; >> +} >> + >> static inline void *kvmppc_create_spapr_tce(uint32_t liobn, >> uint32_t window_size, int *fd) >> { >> -- >> 1.8.3.2 >> > -- Alexey