On Wed, Dec 16, 2020 at 02:24:46PM +0530, Bharata B Rao wrote:
> Implement H_RPT_INVALIDATE hcall and add KVM capability
> KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> 
> This hcall does two types of TLB invalidations:
> 
> 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
>    This is currently not used in KVM as GTSE is not usually
>    disabled in KVM.
> 2. Partition-scoped invalidations that an L1 hypervisor does on
>    behalf of an L2 guest. This replaces the uses of the existing
>    hcall H_TLB_INVALIDATE.
> 
> Signed-off-by: Bharata B Rao <bhar...@linux.ibm.com>
> ---
>  Documentation/virt/kvm/api.rst                |  17 +++
>  .../include/asm/book3s/64/tlbflush-radix.h    |  18 +++
>  arch/powerpc/include/asm/kvm_book3s.h         |   3 +
>  arch/powerpc/kvm/book3s_hv.c                  | 121 ++++++++++++++++++
>  arch/powerpc/kvm/book3s_hv_nested.c           |  94 ++++++++++++++
>  arch/powerpc/kvm/powerpc.c                    |   3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c          |   4 -
>  include/uapi/linux/kvm.h                      |   1 +
>  8 files changed, 257 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index e00a66d72372..5ce237c0d707 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6014,6 +6014,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit 
> notifications which user space
>  can then handle to implement model specific MSR handling and/or user 
> notifications
>  to inform a user that an MSR was not handled.
>  
> +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> +------------------------------
> +
> +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> +:Architectures: ppc
> +:Type: vm
> +
> +This capability indicates that the kernel is capable of handling
> +H_RPT_INVALIDATE hcall.
> +
> +In order to enable the use of H_RPT_INVALIDATE in the guest,
> +user space might have to advertise it for the guest. For example,
> +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> +present in the "ibm,hypertas-functions" device-tree property.
> +
> +This capability is always enabled.
> +
>  8. Other capabilities.
>  ======================
>  
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
> b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index 94439e0cefc9..aace7e9b2397 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -4,6 +4,10 @@
>  
>  #include <asm/hvcall.h>
>  
> +#define RIC_FLUSH_TLB 0
> +#define RIC_FLUSH_PWC 1
> +#define RIC_FLUSH_ALL 2
> +
>  struct vm_area_struct;
>  struct mm_struct;
>  struct mmu_gather;
> @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
>       return H_RPTI_PAGE_ALL;
>  }
>  
> +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> +{
> +     if (page_size == H_RPTI_PAGE_4K)
> +             return MMU_PAGE_4K;
> +     if (page_size == H_RPTI_PAGE_64K)
> +             return MMU_PAGE_64K;
> +     if (page_size == H_RPTI_PAGE_2M)
> +             return MMU_PAGE_2M;
> +     if (page_size == H_RPTI_PAGE_1G)
> +             return MMU_PAGE_1G;
> +     else
> +             return MMU_PAGE_64K; /* Default */
> +}
> +
>  static inline int mmu_get_ap(int psize)
>  {
>       return mmu_psize_defs[psize].ap;
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index d32ec9ae73bd..0f1c5fa6e8ce 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 
> dw1);
>  void kvmhv_release_all_nested(struct kvm *kvm);
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +                      unsigned long type, unsigned long pg_sizes,
> +                      unsigned long start, unsigned long end);
>  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
>                         u64 time_limit, unsigned long lpcr);
>  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index e3b1839fc251..adf2d1191581 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -904,6 +904,118 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
>       return yield_count;
>  }
>  
> +static inline void do_tlb_invalidate_all(unsigned long rb, unsigned long rs)
> +{
> +     asm volatile("ptesync" : : : "memory");
> +     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +             : : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_ALL), "r"(rs)
> +             : "memory");
> +     asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static inline void do_tlb_invalidate_pwc(unsigned long rb, unsigned long rs)
> +{
> +     asm volatile("ptesync" : : : "memory");
> +     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +             : : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_PWC), "r"(rs)
> +             : "memory");
> +     asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static inline void do_tlb_invalidate_tlb(unsigned long rb, unsigned long rs)
> +{
> +     asm volatile("ptesync" : : : "memory");
> +     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> +             : : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_TLB), "r"(rs)
> +             : "memory");
> +     asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static void do_tlb_invalidate(unsigned long rs, unsigned long target,
> +                           unsigned long type, unsigned long page_size,
> +                           unsigned long ap, unsigned long start,
> +                           unsigned long end)
> +{
> +     unsigned long rb;
> +     unsigned long addr = start;
> +
> +     if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
> +             rb = PPC_BIT(53); /* IS = 1 */
> +             do_tlb_invalidate_all(rb, rs);
> +             return;
> +     }
> +
> +     if (type & H_RPTI_TYPE_PWC) {
> +             rb = PPC_BIT(53); /* IS = 1 */
> +             do_tlb_invalidate_pwc(rb, rs);
> +     }
> +
> +     if (!addr && end == -1) { /* PID */
> +             rb = PPC_BIT(53); /* IS = 1 */
> +             do_tlb_invalidate_tlb(rb, rs);
> +     } else { /* EA */
> +             do {
> +                     rb = addr & ~(PPC_BITMASK(52, 63));
> +                     rb |= ap << PPC_BITLSHIFT(58);
> +                     do_tlb_invalidate_tlb(rb, rs);
> +                     addr += page_size;
> +             } while (addr < end);
> +     }
> +}
> +
> +static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
> +                                 unsigned long pid, unsigned long target,
> +                                 unsigned long type, unsigned long pg_sizes,
> +                                 unsigned long start, unsigned long end)
> +{
> +     unsigned long rs, ap, psize;
> +
> +     if (!kvm_is_radix(vcpu->kvm))
> +             return H_FUNCTION;

IIUC The cover note said this case was H_NOT_SUPPORTED, rather than H_FUNCTION.

> +
> +     if (end < start)
> +             return H_P5;
> +
> +     if (type & H_RPTI_TYPE_NESTED) {
> +             if (!nesting_enabled(vcpu->kvm))
> +                     return H_FUNCTION;

Likewise, I'm not sure that H_FUNCTION is the right choice here.

> +
> +             /* Support only cores as target */
> +             if (target != H_RPTI_TARGET_CMMU)
> +                     return H_P2;
> +
> +             return kvmhv_h_rpti_nested(vcpu, pid,
> +                                        (type & ~H_RPTI_TYPE_NESTED),
> +                                         pg_sizes, start, end);
> +     }
> +
> +     rs = pid << PPC_BITLSHIFT(31);
> +     rs |= vcpu->kvm->arch.lpid;
> +
> +     if (pg_sizes & H_RPTI_PAGE_64K) {
> +             psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_64K);
> +             ap = mmu_get_ap(psize);
> +             do_tlb_invalidate(rs, target, type, (1UL << 16), ap, start,
> +                               end);

Should these be conditional on the TLB flag in type?

> +     }
> +
> +     if (pg_sizes & H_RPTI_PAGE_2M) {
> +             psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +             ap = mmu_get_ap(psize);
> +             do_tlb_invalidate(rs, target, type, (1UL << 21), ap, start,
> +                               end);
> +     }
> +
> +     if (pg_sizes & H_RPTI_PAGE_1G) {
> +             psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +             ap = mmu_get_ap(psize);
> +             do_tlb_invalidate(rs, target, type, (1UL << 30), ap, start,
> +                               end);
> +     }
> +
> +     return H_SUCCESS;
> +}
> +
>  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>  {
>       unsigned long req = kvmppc_get_gpr(vcpu, 3);
> @@ -1112,6 +1224,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
>                */
>               ret = kvmppc_h_svm_init_abort(vcpu->kvm);
>               break;
> +     case H_RPT_INVALIDATE:
> +             ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
> +                                           kvmppc_get_gpr(vcpu, 5),
> +                                           kvmppc_get_gpr(vcpu, 6),
> +                                           kvmppc_get_gpr(vcpu, 7),
> +                                           kvmppc_get_gpr(vcpu, 8),
> +                                           kvmppc_get_gpr(vcpu, 9));
> +             break;
>  
>       default:
>               return RESUME_HOST;
> @@ -1158,6 +1278,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
>       case H_XIRR_X:
>  #endif
>       case H_PAGE_INIT:
> +     case H_RPT_INVALIDATE:
>               return 1;
>       }
>  
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
> b/arch/powerpc/kvm/book3s_hv_nested.c
> index 33b58549a9aa..a54ba4b1d4a7 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -1149,6 +1149,100 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
>       return H_SUCCESS;
>  }
>  
> +static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
> +                                      unsigned long lpid,
> +                                      unsigned long page_size,
> +                                      unsigned long ap,
> +                                      unsigned long start,
> +                                      unsigned long end)
> +{
> +     unsigned long addr = start;
> +     int ret;
> +
> +     do {
> +             ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
> +                                                get_epn(addr));
> +             if (ret)
> +                     return ret;
> +             addr += page_size;
> +     } while (addr < end);
> +
> +     return ret;
> +}
> +
> +static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
> +                                      unsigned long lpid)
> +{
> +     struct kvm *kvm = vcpu->kvm;
> +     struct kvm_nested_guest *gp;
> +
> +     gp = kvmhv_get_nested(kvm, lpid, false);
> +     if (gp) {
> +             kvmhv_emulate_tlbie_lpid(vcpu, gp, RIC_FLUSH_ALL);
> +             kvmhv_put_nested(gp);
> +     }
> +     return H_SUCCESS;
> +}
> +
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +                      unsigned long type, unsigned long pg_sizes,
> +                      unsigned long start, unsigned long end)
> +{
> +     struct kvm_nested_guest *gp;
> +     long ret;
> +     unsigned long psize, ap;
> +
> +     /*
> +      * If L2 lpid isn't valid, we need to return H_PARAMETER.
> +      * Nested KVM issues a L2 lpid flush call when creating
> +      * partition table entries for L2. This happens even before
> +      * the corresponding shadow lpid is created in HV. Until
> +      * this is fixed, ignore such flush requests.
> +      */
> +     gp = kvmhv_find_nested(vcpu->kvm, lpid);
> +     if (!gp)
> +             return H_SUCCESS;
> +
> +     if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
> +             return do_tlb_invalidate_nested_all(vcpu, lpid);
> +
> +     if ((type & H_RPTI_TYPE_TLB) == H_RPTI_TYPE_TLB) {
> +             if (pg_sizes & H_RPTI_PAGE_64K) {
> +                     psize = rpti_pgsize_to_psize(pg_sizes & 
> H_RPTI_PAGE_64K);
> +                     ap = mmu_get_ap(psize);
> +
> +                     ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +                                                        (1UL << 16),
> +                                                        ap, start, end);
> +                     if (ret)
> +                             return H_P4;
> +             }
> +
> +             if (pg_sizes & H_RPTI_PAGE_2M) {
> +                     psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_2M);
> +                     ap = mmu_get_ap(psize);
> +
> +                     ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +                                                        (1UL << 21),
> +                                                        ap, start, end);
> +                     if (ret)
> +                             return H_P4;
> +             }
> +
> +             if (pg_sizes & H_RPTI_PAGE_1G) {
> +                     psize = rpti_pgsize_to_psize(pg_sizes & H_RPTI_PAGE_1G);
> +                     ap = mmu_get_ap(psize);
> +
> +                     ret = do_tlb_invalidate_nested_tlb(vcpu, lpid,
> +                                                        (1UL << 30),
> +                                                        ap, start, end);
> +                     if (ret)
> +                             return H_P4;
> +             }
> +     }
> +     return H_SUCCESS;
> +}
> +
>  /* Used to convert a nested guest real address to a L1 guest real address */
>  static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
>                                      struct kvm_nested_guest *gp,
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 13999123b735..172a89187116 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -678,6 +678,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
>               r = hv_enabled && kvmppc_hv_ops->enable_svm &&
>                       !kvmppc_hv_ops->enable_svm(NULL);
>               break;
> +     case KVM_CAP_PPC_RPT_INVALIDATE:
> +             r = 1;
> +             break;
>  #endif
>       default:
>               r = 0;
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
> b/arch/powerpc/mm/book3s64/radix_tlb.c
> index b487b489d4b6..3a2b12d1d49b 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -18,10 +18,6 @@
>  #include <asm/cputhreads.h>
>  #include <asm/plpar_wrappers.h>
>  
> -#define RIC_FLUSH_TLB 0
> -#define RIC_FLUSH_PWC 1
> -#define RIC_FLUSH_ALL 2
> -
>  /*
>   * tlbiel instruction for radix, set invalidation
>   * i.e., r=1 and is=01 or is=10 or is=11
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index ca41220b40b8..c9ece825299e 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_X86_USER_SPACE_MSR 188
>  #define KVM_CAP_X86_MSR_FILTER 189
>  #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
> +#define KVM_CAP_PPC_RPT_INVALIDATE 191
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to