On 20.12.2011, at 11:22, Paul Mackerras wrote:

> The PAPR API allows three sorts of per-virtual-processor areas to be
> registered (VPA, SLB shadow buffer, and dispatch trace log), and
> furthermore, these can be registered and unregistered for another
> virtual CPU.  Currently we just update the vcpu fields pointing to
> these areas at the time of registration or unregistration.  If this
> is done on another vcpu, there is the possibility that the target vcpu
> is using those fields at the time and could end up using a bogus
> pointer and corrupting memory.
> 
> This fixes the race by making the target cpu itself do the update, so
> we can be sure that the update happens at a time when the fields aren't
> being used.  These are updated from a set of 'next_*' fields, which
> are protected by a spinlock.  (We could have just taken the spinlock
> when using the vpa, slb_shadow or dtl fields, but that would mean
> taking the spinlock on every guest entry and exit.)
> 
> The code in do_h_register_vpa now takes the spinlock and updates the
> 'next_*' fields.  There is also a set of '*_pending' flags to indicate
> that an update is pending.
> 
> This also changes 'struct dtl' (which was undefined) to 'struct dtl_entry',
> which is what the rest of the kernel uses.
> 
> Signed-off-by: Paul Mackerras <pau...@samba.org>
> ---
> arch/powerpc/include/asm/kvm_host.h |   15 +++-
> arch/powerpc/kvm/book3s_hv.c        |  167 +++++++++++++++++++++++++----------
> 2 files changed, 131 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 1cb6e52..b1126c1 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -82,7 +82,7 @@ struct kvm_vcpu;
> 
> struct lppaca;
> struct slb_shadow;
> -struct dtl;
> +struct dtl_entry;
> 
> struct kvm_vm_stat {
>       u32 remote_tlb_flush;
> @@ -449,9 +449,18 @@ struct kvm_vcpu_arch {
>       u32 last_inst;
> 
>       struct lppaca *vpa;
> +     struct lppaca *next_vpa;
>       struct slb_shadow *slb_shadow;
> -     struct dtl *dtl;
> -     struct dtl *dtl_end;
> +     struct slb_shadow *next_slb_shadow;
> +     struct dtl_entry *dtl;
> +     struct dtl_entry *dtl_end;
> +     struct dtl_entry *dtl_ptr;
> +     struct dtl_entry *next_dtl;
> +     struct dtl_entry *next_dtl_end;
> +     u8 vpa_pending;
> +     u8 slb_shadow_pending;
> +     u8 dtl_pending;
> +     spinlock_t vpa_update_lock;
> 
>       wait_queue_head_t *wqp;
>       struct kvmppc_vcore *vcore;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index c11d960..6f6e88d 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -140,7 +140,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu 
> *vcpu,
> {
>       struct kvm *kvm = vcpu->kvm;
>       unsigned long len, nb;
> -     void *va;
> +     void *va, *free_va, *tvpa, *dtl, *ss;
>       struct kvm_vcpu *tvcpu;
>       int err = H_PARAMETER;
> 
> @@ -152,6 +152,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu 
> *vcpu,
>       flags &= 7;
>       if (flags == 0 || flags == 4)

This could probably use a new variable name. Also, what do 0 and 4 mean? 
Constant defines would be nice here.

>               return H_PARAMETER;
> +     free_va = va = NULL;
> +     len = 0;
>       if (flags < 4) {
>               if (vpa & 0x7f)
>                       return H_PARAMETER;
> @@ -165,65 +167,122 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu 
> *vcpu,

[pasted from real source]
>                 va = kvmppc_pin_guest_page(kvm, vpa, &nb);

Here you're pinning the page, setting va to that temporarily available address.

[...]

>                       len = *(unsigned short *)(va + 4);

This is a condition on (flags <= 1). We bail out on flags == 0 a few lines up. 
Just move this whole thing into the respective function handlers.

>               else
>                       len = *(unsigned int *)(va + 4);

va + 4 isn't really descriptive. Is this a defined struct? Why not actually 
define one which you can just read data from? Or at least make this a define 
too. Reading random numbers in code is barely readable.

> +             free_va = va;

Now free_va is the temporarily available address.

>               if (len > nb)
>                       goto out_unpin;
> -             switch (flags) {
> -             case 1:         /* register VPA */
> -                     if (len < 640)
> -                             goto out_unpin;
> -                     if (tvcpu->arch.vpa)
> -                             kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
> -                     tvcpu->arch.vpa = va;
> -                     init_vpa(vcpu, va);
> -                     break;
> -             case 2:         /* register DTL */
> -                     if (len < 48)
> -                             goto out_unpin;
> -                     len -= len % 48;
> -                     if (tvcpu->arch.dtl)
> -                             kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
> -                     tvcpu->arch.dtl = va;
> -                     tvcpu->arch.dtl_end = va + len;
> +     }
> +
> +     spin_lock(&tvcpu->arch.vpa_update_lock);
> +
> +     switch (flags) {
> +     case 1:         /* register VPA */

Yeah, these could also use defines :)


> +             if (len < 640)
>                       break;
> -             case 3:         /* register SLB shadow buffer */
> -                     if (len < 16)
> -                             goto out_unpin;
> -                     if (tvcpu->arch.slb_shadow)
> -                             kvmppc_unpin_guest_page(kvm, 
> vcpu->arch.slb_shadow);
> -                     tvcpu->arch.slb_shadow = va;
> +             free_va = tvcpu->arch.next_vpa;
> +             tvcpu->arch.next_vpa = va;

Now you're setting next_vpa to this temporarily available address? But next_vpa 
will be used after va is getting free'd, no? Or is that why you have free_va?

Wouldn't it be easier to just map it every time we actually use it and only 
shove the GPA around? We could basically save ourselves a lot of the logic here.


Alex

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to