Re: [PATCH v9 12/16] ARM: KVM: World-switch implementation

Avi Kivity Tue, 03 Jul 2012 03:08:24 -0700

On 07/03/2012 12:01 PM, Christoffer Dall wrote:
> Provides complete world-switch implementation to switch to other guests
> running in non-secure modes. Includes Hyp exception handlers that
> capture necessary exception information and stores the information on
> the VCPU and KVM structures.
> 
> The following Hyp-ABI is also documented in the code:
> 
> Hyp-ABI: Switching from host kernel to Hyp-mode:
>    Switching to Hyp mode is done through a simple HVC instructions. The
>    exception vector code will check that the HVC comes from VMID==0 and if
>    so will store the necessary state on the Hyp stack, which will look like
>    this (growing downwards, see the hyp_hvc handler):
>      ...
>      stack_page + 4: spsr (Host-SVC cpsr)
>      stack_page    : lr_usr
>      --------------: stack bottom
> 
> Hyp-ABI: Switching from Hyp-mode to host kernel SVC mode:
>    When returning from Hyp mode to SVC mode, another HVC instruction is
>    executed from Hyp mode, which is taken in the hyp_svc handler. The
>    bottom of the Hyp is derived from the Hyp stack pointer (only a single
>    page aligned stack is used per CPU) and the initial SVC registers are
>    used to restore the host state.
> 
> Otherwise, the world-switch is pretty straight-forward. All state that
> can be modified by the guest is first backed up on the Hyp stack and the
> VCPU values is loaded onto the hardware. State, which is not loaded, but
> theoretically modifiable by the guest is protected through the
> virtualiation features to generate a trap and cause software emulation.
> Upon guest returns, all state is restored from hardware onto the VCPU
> struct and the original state is restored from the Hyp-stack onto the
> hardware.
> 
> One controversy may be the back-door call to __irq_svc (the host
> kernel's own physical IRQ handler) which is called when a physical IRQ
> exception is taken in Hyp mode while running in the guest.
> 
> SMP support using the VMPIDR calculated on the basis of the host MPIDR
> and overriding the low bits with KVM vcpu_id contributed by Marc Zyngier.


He should sign off on this patch then.

> 
> Reuse of VMIDs has been implemented by Antonios Motakis and adapated from
> a separate patch into the appropriate patches introducing the
> functionality. Note that the VMIDs are stored per VM as required by the ARM
> architecture reference manual.

Ditto.

> diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
> index 220f241..232117c 100644
> --- a/arch/arm/include/asm/kvm_arm.h
> +++ b/arch/arm/include/asm/kvm_arm.h
> @@ -105,6 +105,17 @@
>  #define TTBCR_T0SZ   3
>  #define HTCR_MASK    (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
>  
> +/* Hyp System Trap Register */
> +#define HSTR_T(x)    (1 << x)
> +#define HSTR_TTEE    (1 << 16)
> +#define HSTR_TJDBX   (1 << 17)
> +
> +/* Hyp Coprocessor Trap Register */
> +#define HCPTR_TCP(x) (1 << x)
> +#define HCPTR_TCP_MASK       (0x3fff)
> +#define HCPTR_TASE   (1 << 15)
> +#define HCPTR_TTA    (1 << 20)
> +#define HCPTR_TCPAC  (1 << 31)
>  
>  /* Virtualization Translation Control Register (VTCR) bits */
>  #define VTCR_SH0     (3 << 12)
> @@ -126,5 +137,31 @@
>  #define VTTBR_X              (5 - VTCR_GUEST_T0SZ)
>  #endif
>  
> +/* Hyp Syndrome Register (HSR) bits */
> +#define HSR_EC_SHIFT (26)
> +#define HSR_EC               (0x3fU << HSR_EC_SHIFT)
> +#define HSR_IL               (1U << 25)
> +#define HSR_ISS              (HSR_IL - 1)
> +#define HSR_ISV_SHIFT        (24)
> +#define HSR_ISV              (1U << HSR_ISV_SHIFT)
> +
> +#define HSR_EC_UNKNOWN       (0x00)
> +#define HSR_EC_WFI   (0x01)
> +#define HSR_EC_CP15_32       (0x03)
> +#define HSR_EC_CP15_64       (0x04)
> +#define HSR_EC_CP14_MR       (0x05)
> +#define HSR_EC_CP14_LS       (0x06)
> +#define HSR_EC_CP_0_13       (0x07)
> +#define HSR_EC_CP10_ID       (0x08)
> +#define HSR_EC_JAZELLE       (0x09)
> +#define HSR_EC_BXJ   (0x0A)
> +#define HSR_EC_CP14_64       (0x0C)
> +#define HSR_EC_SVC_HYP       (0x11)
> +#define HSR_EC_HVC   (0x12)
> +#define HSR_EC_SMC   (0x13)
> +#define HSR_EC_IABT  (0x20)
> +#define HSR_EC_IABT_HYP      (0x21)
> +#define HSR_EC_DABT  (0x24)
> +#define HSR_EC_DABT_HYP      (0x25)
>  
>  #endif /* __ARM_KVM_ARM_H__ */
> diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
> index b57c75e..38d3a12 100644
> --- a/arch/arm/kernel/armksyms.c
> +++ b/arch/arm/kernel/armksyms.c
> @@ -48,6 +48,13 @@ extern void __aeabi_ulcmp(void);
>  
>  extern void fpundefinstr(void);
>  
> +#ifdef CONFIG_KVM_ARM_HOST
> +/* This is needed for KVM */
> +extern void __irq_svc(void);
> +
> +EXPORT_SYMBOL_GPL(__irq_svc);
> +#endif
> +
>       /* platform dependent support */
>  EXPORT_SYMBOL(__udelay);
>  EXPORT_SYMBOL(__const_udelay);
> diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
> index 1429d89..9c76b53 100644
> --- a/arch/arm/kernel/asm-offsets.c
> +++ b/arch/arm/kernel/asm-offsets.c
> @@ -13,6 +13,7 @@
>  #include <linux/sched.h>
>  #include <linux/mm.h>
>  #include <linux/dma-mapping.h>
> +#include <linux/kvm_host.h>
>  #include <asm/cacheflush.h>
>  #include <asm/glue-df.h>
>  #include <asm/glue-pf.h>
> @@ -144,5 +145,47 @@ int main(void)
>    DEFINE(DMA_BIDIRECTIONAL,  DMA_BIDIRECTIONAL);
>    DEFINE(DMA_TO_DEVICE,              DMA_TO_DEVICE);
>    DEFINE(DMA_FROM_DEVICE,    DMA_FROM_DEVICE);
> +#ifdef CONFIG_KVM_ARM_HOST
> +  DEFINE(VCPU_KVM,           offsetof(struct kvm_vcpu, kvm));
> +  DEFINE(VCPU_MIDR,          offsetof(struct kvm_vcpu, arch.cp15[c0_MIDR]));
> +  DEFINE(VCPU_MPIDR,         offsetof(struct kvm_vcpu, arch.cp15[c0_MPIDR]));
> +  DEFINE(VCPU_SCTLR,         offsetof(struct kvm_vcpu, arch.cp15[c1_SCTLR]));
> +  DEFINE(VCPU_CPACR,         offsetof(struct kvm_vcpu, arch.cp15[c1_CPACR]));
> +  DEFINE(VCPU_TTBR0,         offsetof(struct kvm_vcpu, arch.cp15[c2_TTBR0]));
> +  DEFINE(VCPU_TTBR1,         offsetof(struct kvm_vcpu, arch.cp15[c2_TTBR1]));
> +  DEFINE(VCPU_TTBCR,         offsetof(struct kvm_vcpu, arch.cp15[c2_TTBCR]));
> +  DEFINE(VCPU_DACR,          offsetof(struct kvm_vcpu, arch.cp15[c3_DACR]));
> +  DEFINE(VCPU_DFSR,          offsetof(struct kvm_vcpu, arch.cp15[c5_DFSR]));
> +  DEFINE(VCPU_IFSR,          offsetof(struct kvm_vcpu, arch.cp15[c5_IFSR]));
> +  DEFINE(VCPU_ADFSR,         offsetof(struct kvm_vcpu, arch.cp15[c5_ADFSR]));
> +  DEFINE(VCPU_AIFSR,         offsetof(struct kvm_vcpu, arch.cp15[c5_AIFSR]));
> +  DEFINE(VCPU_DFAR,          offsetof(struct kvm_vcpu, arch.cp15[c6_DFAR]));
> +  DEFINE(VCPU_IFAR,          offsetof(struct kvm_vcpu, arch.cp15[c6_IFAR]));
> +  DEFINE(VCPU_PRRR,          offsetof(struct kvm_vcpu, arch.cp15[c10_PRRR]));
> +  DEFINE(VCPU_NMRR,          offsetof(struct kvm_vcpu, arch.cp15[c10_NMRR]));
> +  DEFINE(VCPU_VBAR,          offsetof(struct kvm_vcpu, arch.cp15[c12_VBAR]));
> +  DEFINE(VCPU_CID,           offsetof(struct kvm_vcpu, arch.cp15[c13_CID]));
> +  DEFINE(VCPU_TID_URW,               offsetof(struct kvm_vcpu, 
> arch.cp15[c13_TID_URW]));
> +  DEFINE(VCPU_TID_URO,               offsetof(struct kvm_vcpu, 
> arch.cp15[c13_TID_URO]));
> +  DEFINE(VCPU_TID_PRIV,              offsetof(struct kvm_vcpu, 
> arch.cp15[c13_TID_PRIV]));
> +  DEFINE(VCPU_REGS,          offsetof(struct kvm_vcpu, arch.regs));
> +  DEFINE(VCPU_USR_REGS,              offsetof(struct kvm_vcpu, 
> arch.regs.usr_regs));
> +  DEFINE(VCPU_SVC_REGS,              offsetof(struct kvm_vcpu, 
> arch.regs.svc_regs));
> +  DEFINE(VCPU_ABT_REGS,              offsetof(struct kvm_vcpu, 
> arch.regs.abt_regs));
> +  DEFINE(VCPU_UND_REGS,              offsetof(struct kvm_vcpu, 
> arch.regs.und_regs));
> +  DEFINE(VCPU_IRQ_REGS,              offsetof(struct kvm_vcpu, 
> arch.regs.irq_regs));
> +  DEFINE(VCPU_FIQ_REGS,              offsetof(struct kvm_vcpu, 
> arch.regs.fiq_regs));
> +  DEFINE(VCPU_PC,            offsetof(struct kvm_vcpu, arch.regs.pc));
> +  DEFINE(VCPU_CPSR,          offsetof(struct kvm_vcpu, arch.regs.cpsr));
> +  DEFINE(VCPU_IRQ_LINES,     offsetof(struct kvm_vcpu, arch.irq_lines));
> +  DEFINE(VCPU_HSR,           offsetof(struct kvm_vcpu, arch.hsr));
> +  DEFINE(VCPU_HDFAR,         offsetof(struct kvm_vcpu, arch.hdfar));
> +  DEFINE(VCPU_HIFAR,         offsetof(struct kvm_vcpu, arch.hifar));
> +  DEFINE(VCPU_HPFAR,         offsetof(struct kvm_vcpu, arch.hpfar));
> +  DEFINE(VCPU_PC_IPA,                offsetof(struct kvm_vcpu, arch.pc_ipa));
> +  DEFINE(VCPU_PC_IPA2,               offsetof(struct kvm_vcpu, 
> arch.pc_ipa2));
> +  DEFINE(VCPU_HYP_PC,                offsetof(struct kvm_vcpu, arch.hyp_pc));
> +  DEFINE(KVM_VTTBR,          offsetof(struct kvm, arch.vttbr));
> +#endif
>    return 0; 
>  }
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 437f0c4..db029bb 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -209,6 +209,7 @@ __dabt_svc:
>  ENDPROC(__dabt_svc)
>  
>       .align  5
> +     .globl __irq_svc
>  __irq_svc:
>       svc_entry
>       irq_handler
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 8b024ee..4687690 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -37,12 +37,19 @@
>  #include <asm/mman.h>
>  #include <asm/idmap.h>
>  #include <asm/tlbflush.h>
> +#include <asm/cputype.h>
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/kvm_emulate.h>
>  
>  static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
>  
> +/* The VMID used in the VTTBR */
> +static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
> +static u8 kvm_next_vmid;
> +DEFINE_SPINLOCK(kvm_vmid_lock);

static, too.

> +
> +
> +/**
> + * check_new_vmid_gen - check that the VMID is still valid
> + * @kvm: The VM's VMID to checkt
> + *
> + * return true if there is a new generation of VMIDs being used
> + *
> + * The hardware supports only 256 values with the value zero reserved for the
> + * host, so we check if an assigned value belongs to a previous generation,
> + * which which requires us to assign a new value. If we're the first to use a
> + * VMID for the new generation, we must flush necessary caches and TLBs on 
> all
> + * CPUs.
> + */
> +static bool check_new_vmid_gen(struct kvm *kvm)
> +{
> +     return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
> +}

Better have the name indicate what a true return value means, like
'need_new_vmid_gen()'.

> +
> +/**
> + * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
> + * @kvm      The guest that we are about to run
> + *
> + * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure 
> the
> + * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
> + * caches and TLBs.
> + */
> +static void update_vttbr(struct kvm *kvm)
> +{
> +     phys_addr_t pgd_phys;
> +
> +     if (!check_new_vmid_gen(kvm))
> +             return;
> +
> +     spin_lock(&kvm_vmid_lock);
> +
> +     /* First user of a new VMID generation? */
> +     if (unlikely(kvm_next_vmid == 0)) {
> +             atomic64_inc(&kvm_vmid_gen);
> +             kvm_next_vmid = 1;
> +
> +             /* This does nothing on UP */
> +             smp_call_function(reset_vm_context, NULL, 1);
> +
> +             /*
> +              * On SMP we know no other CPUs can use this CPU's or
> +              * each other's VMID since the kvm_vmid_lock blocks
> +              * them from reentry to the guest.
> +              */
> +
> +             reset_vm_context(NULL);

on_each_cpu() will combine the two lines above.

> +     }
> +
> +     kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
> +     kvm->arch.vmid = kvm_next_vmid;
> +     kvm_next_vmid++;
> +
> +     /* update vttbr to be used with the new vmid */
> +     pgd_phys = virt_to_phys(kvm->arch.pgd);
> +     kvm->arch.vttbr = pgd_phys & ((1LLU << 40) - 1)
> +                       & ~((2 << VTTBR_X) - 1);
> +     kvm->arch.vttbr |= (u64)(kvm->arch.vmid) << 48;
> +
> +     spin_unlock(&kvm_vmid_lock);
> +}
> +
> +/*
> + * Return 0 to return to guest, < 0 on error, exit_reason ( > 0) on proper
> + * exit to QEMU.
> + */
> +static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
> +                    int exception_index)
> +{
> +     return -EINVAL;

x86 returns KVM_EXIT_INTERNAL_ERROR when it encounters an unhandlable
exit.  -EINVAL indicates that the user has done something wrong, which
isn't the case here.

> +}
> +
> +/*
> + * Return 0 to proceed with guest entry
> + */
> +static int vcpu_pre_guest_enter(struct kvm_vcpu *vcpu, int *exit_reason)
> +{
> +     if (signal_pending(current)) {
> +             *exit_reason = KVM_EXIT_INTR;
> +             return -EINTR;
> +     }
> +
> +     if (check_new_vmid_gen(vcpu->kvm))
> +             return 1;
> +
> +     BUG_ON(__vcpu_mode(*vcpu_cpsr(vcpu)) == 0xf);
> +
>       return 0;
>  }
>  
> +/**
> + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
> + * @vcpu:    The VCPU pointer
> + * @run:     The kvm_run structure pointer used for userspace state exchange
> + *
> + * This function is called through the VCPU_RUN ioctl called from user 
> space. It
> + * will execute VM code in a loop until the time slice for the process is 
> used
> + * or some emulation is needed from user space in which case the function 
> will
> + * return with return value 0 and with the kvm_run structure filled in with 
> the
> + * required data for the requested emulation.
> + */
>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> -     return -EINVAL;
> +     int ret = 0;
> +     int exit_reason;
> +     sigset_t sigsaved;
> +
> +     if (vcpu->sigset_active)
> +             sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
> +

We should move this to common code.  But I don't mind if this is done
post merge.

> +     exit_reason = KVM_EXIT_UNKNOWN;
> +     while (exit_reason == KVM_EXIT_UNKNOWN) {

Looping over 'ret' is more in line with x86 and clearer IMO.  x86 uses
the convention: < 0 -> return to userspace with error, 0 -> return to
userspace, 1 -> loop.

> +             /*
> +              * Check conditions before entering the guest
> +              */
> +             cond_resched();
> +
> +             update_vttbr(vcpu->kvm);
> +
> +             local_irq_disable();
> +
> +             /* Re-check atomic conditions */
> +             ret = vcpu_pre_guest_enter(vcpu, &exit_reason);
> +             if (ret != 0) {
> +                     local_irq_enable();
> +                     preempt_enable();
> +                     continue;

See - you continue, only to break out of the loop due to a side effect
on exit_reason.

> +             }
> +
> +             /**************************************************************
> +              * Enter the guest
> +              */
> +             trace_kvm_entry(vcpu->arch.regs.pc);
> +             kvm_guest_enter();
> +             vcpu->mode = IN_GUEST_MODE;
> +
> +             ret = __kvm_vcpu_run(vcpu);
> +
> +             vcpu->mode = OUTSIDE_GUEST_MODE;
> +             vcpu->stat.exits++;

The tracepoint above should be sufficient for statistics.

> +             kvm_guest_exit();
> +             trace_kvm_exit(vcpu->arch.regs.pc);
> +             local_irq_enable();
> +
> +             /*
> +              * Back from guest
> +              *************************************************************/
> +
> +             ret = handle_exit(vcpu, run, ret);
> +             if (ret < 0) {
> +                     kvm_err("Error in handle_exit\n");
> +                     break;
> +             } else {
> +                     exit_reason = ret; /* 0 == KVM_EXIT_UNKNOWN */
> +             }
> +     }
> +
> +     if (vcpu->sigset_active)
> +             sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> +
> +     run->exit_reason = exit_reason;
> +     return ret;
>  }
>  

-- 
error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v9 12/16] ARM: KVM: World-switch implementation

Reply via email to