On 20/05/16 16:10, Julien Grall wrote: > Hi Juergen, > > On 20/05/16 14:22, Juergen Gross wrote: >> A guest mapping vcpu_runstate_info into its memory can't read this >> information from another cpu but the one the data is referring to. >> Reason is there is no reliable way for the guest to detect a concurrent >> data update by the hypervisor. >> >> This patch series adds an update flag to the mapped data which can be >> used by the guest to detect an update is occurring. As this flag is >> modifying the current interface it has to be activated by using a >> vm_assist hypercall, which in turn has to be made available for ARM. >> >> Runtime tested on x86 with a modified Linux kernel using the new >> feature. >> Compile tested only for ARM. > > I would like to give a go on ARM. Who it be possible to provide the > patch for Linux and how to test it?
Sure. You'll need the four attached patches (to be applied on top of kernel 4.6). With CONFIG_PARAVIRT_TIME_ACCOUNTING set in the kernel config, full functionality will be used (without being set the runstate info of other cpus won't be read). You can verify the vm_assist hypercall has worked via "xl debug-keys q" and "xl dmesg | grep vm_assist" (value should be 00000020 on ARM). Juergen
>From 689b4ba8c13be73ed51e485a7f7baea593d0ce6e Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgr...@suse.com> Date: Tue, 17 May 2016 14:03:02 +0200 Subject: [PATCH v4] xen: add steal_clock support on x86 The pv_time_ops structure contains a function pointer for the "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86 uses its own mechanism to account for the "stolen" time a thread wasn't able to run due to hypervisor scheduling. Add support in Xen arch independent time handling for this feature by moving it out of the arm arch into drivers/xen and remove the x86 Xen hack. Signed-off-by: Juergen Gross <jgr...@suse.com> Reviewed-by: Boris Ostrovsky <boris.ostrov...@oracle.com> --- V4: minor adjustments as requested by Stefano Stabellini (remove no longer needed #include, remove __init from header) V3: add #include <asm/paravirt.h> to avoid build error on arm V2: remove the x86 do_stolen_accounting() hack --- arch/arm/xen/enlighten.c | 18 ++---------------- arch/x86/xen/time.c | 44 ++------------------------------------------ drivers/xen/time.c | 20 ++++++++++++++++++++ include/linux/kernel_stat.h | 1 - include/xen/xen-ops.h | 1 + kernel/sched/cputime.c | 10 ---------- 6 files changed, 25 insertions(+), 69 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 75cd734..71db30c 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -12,7 +12,6 @@ #include <xen/page.h> #include <xen/interface/sched.h> #include <xen/xen-ops.h> -#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> #include <asm/system_misc.h> @@ -84,19 +83,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); -static unsigned long long xen_stolen_accounting(int cpu) -{ - struct vcpu_runstate_info state; - - BUG_ON(cpu != smp_processor_id()); - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; -} - static void xen_read_wallclock(struct timespec64 *ts) { u32 version; @@ -355,8 +341,8 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); - pv_time_ops.steal_clock = xen_stolen_accounting; - static_key_slow_inc(¶virt_steal_enabled); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index a0a4e55..6be31df 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -11,8 +11,6 @@ #include <linux/interrupt.h> #include <linux/clocksource.h> #include <linux/clockchips.h> -#include <linux/kernel_stat.h> -#include <linux/math64.h> #include <linux/gfp.h> #include <linux/slab.h> #include <linux/pvclock_gtod.h> @@ -31,44 +29,6 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 runnable, offline, stolen; - cputime_t ticks; - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = this_cpu_ptr(&xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); -} /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) @@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; } - do_stolen_accounting(); - return ret; } @@ -431,6 +389,8 @@ static void __init xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 7107842..2257b66 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -6,6 +6,7 @@ #include <linux/math64.h> #include <linux/gfp.h> +#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } +static u64 xen_steal_clock(int cpu) +{ + struct vcpu_runstate_info state; + + BUG_ON(cpu != smp_processor_id()); + xen_get_runstate_snapshot(&state); + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu) BUG(); } +void __init xen_time_setup_guest(void) +{ + pv_time_ops.steal_clock = xen_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + /* + * We can't set paravirt_steal_rq_enabled as this would require the + * capability to read another cpu's runstate info. + */ +} diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 25a822f..44fda64 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user) extern void account_process_tick(struct task_struct *, int user); #endif -extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 86abe07..77bf9d1 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); +void xen_time_setup_guest(void); void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); int xen_setup_shutdown_event(void); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 75f98c5..8c4c6dc 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int user_tick) } /* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* * Account multiple ticks of idle time. * @ticks: number of stolen ticks */ -- 2.6.6
>From 4073bb301aed18981ec69c3cf5f0df4fae567d7c Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgr...@suse.com> Date: Fri, 20 May 2016 09:32:30 +0200 Subject: [PATCH 1/3] xen: update xen headers Update some Xen headers to be able to use new functionality. Signed-off-by: Juergen Gross <jgr...@suse.com> --- include/xen/interface/vcpu.h | 24 +++++++++++++++--------- include/xen/interface/xen.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index b05288c..98188c8 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -75,15 +75,21 @@ */ #define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { - /* VCPU's current state (RUNSTATE_*). */ - int state; - /* When was current state entered (system time, ns)? */ - uint64_t state_entry_time; - /* - * Time spent in each RUNSTATE_* (ns). The sum of these times is - * guaranteed not to drift from system time. - */ - uint64_t time[4]; + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Update indicator set in state_entry_time: + * When activated via VMASST_TYPE_runstate_update_flag, set during + * updates in guest memory mapped copy of vcpu_runstate_info. + */ +#define XEN_RUNSTATE_UPDATE (1ULL << 63) + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index d133112..1b0d189 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -413,7 +413,22 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); /* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 -#define MAX_VMASST_TYPE 3 +/* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 + +/* + * All guests: activate update indicator in vcpu_runstate_info + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped + * vcpu_runstate_info during updates of the runstate information. + */ +#define VMASST_TYPE_runstate_update_flag 5 + +#define MAX_VMASST_TYPE 5 #ifndef __ASSEMBLY__ -- 2.6.6
>From ab457b88c03a66c6051ac022b51bc5c218f48842 Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgr...@suse.com> Date: Fri, 20 May 2016 12:08:21 +0200 Subject: [PATCH 2/3] arm/xen: add support for vm_assist hypercall Add support for the Xen HYPERVISOR_vm_assist hypercall. Signed-off-by: Juergen Gross <jgr...@suse.com> --- arch/arm/include/asm/xen/hypercall.h | 1 + arch/arm/xen/enlighten.c | 1 + arch/arm/xen/hypercall.S | 1 + arch/arm64/xen/hypercall.S | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index b6b962d..9d874db 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 71db30c..0f3aa12 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -389,4 +389,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 9a36f4f..a648dfc 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 70df80e..329c802 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) mov x16, x0 -- 2.6.6
>From f27da1aba6c9c92add4f88b4dcec517e5e321caa Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgr...@suse.com> Date: Fri, 20 May 2016 12:25:58 +0200 Subject: [PATCH 3/3] xen: support runqueue steal time on xen Up to now reading the stolen time of a remote cpu was not possible in a performant way under Xen. This made support of runqueue steal time via paravirt_steal_rq_enabled impossible. With the addition of an appropriate hypervisor interface this is now possible, so add the support. Signed-off-by: Juergen Gross <jgr...@suse.com> --- drivers/xen/time.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 2257b66..04b6cb7 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -19,6 +19,9 @@ /* runstate info updated by Xen */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); +/* runstate info of remote cpu accessible */ +static bool xen_runstate_remote; + /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) { @@ -47,27 +50,31 @@ static u64 get64(const u64 *p) return ret; } -/* - * Runstate accounting - */ -void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res, + unsigned cpu) { u64 state_time; struct vcpu_runstate_info *state; BUG_ON(preemptible()); - state = this_cpu_ptr(&xen_runstate); + state = per_cpu_ptr(&xen_runstate, cpu); - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ do { state_time = get64(&state->state_entry_time); + rmb(); *res = READ_ONCE(*state); - } while (get64(&state->state_entry_time) != state_time); + rmb(); + } while (get64(&state->state_entry_time) != state_time || + (state_time & XEN_RUNSTATE_UPDATE)); +} + +/* + * Runstate accounting + */ +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + xen_get_runstate_snapshot_cpu(res, smp_processor_id()); } /* return true when a vcpu could run but has no real cpu to run on */ @@ -80,8 +87,8 @@ static u64 xen_steal_clock(int cpu) { struct vcpu_runstate_info state; - BUG_ON(cpu != smp_processor_id()); - xen_get_runstate_snapshot(&state); + BUG_ON(!xen_runstate_remote && cpu != smp_processor_id()); + xen_get_runstate_snapshot_cpu(&state, cpu); return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; } @@ -98,11 +105,12 @@ void xen_setup_runstate_info(int cpu) void __init xen_time_setup_guest(void) { + xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, + VMASST_TYPE_runstate_update_flag); + pv_time_ops.steal_clock = xen_steal_clock; static_key_slow_inc(¶virt_steal_enabled); - /* - * We can't set paravirt_steal_rq_enabled as this would require the - * capability to read another cpu's runstate info. - */ + if (xen_runstate_remote) + static_key_slow_inc(¶virt_steal_rq_enabled); } -- 2.6.6
_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel