On 18/05/16 18:09, Tony S wrote: > On Wed, May 18, 2016 at 8:57 AM, Dario Faggioli > <dario.faggi...@citrix.com> wrote: >> On Wed, 2016-05-18 at 14:24 +0200, Juergen Gross wrote: >>> On 17/05/16 11:33, George Dunlap wrote: >>>>> Looks like CONFIG_PARAVIRT_TIME_ACCOUNTING is used for adjusting >>>>> process >>>>> times. KVM uses it but Xen doesn't. >>>> Is someone on the Linux side going to put this on their to-do list >>>> then? :-) >>> >>> Patch sent. >>> >> Yep, seen it, thanks. >> >>> Support was already existing for arm. >>> >> Yes!! I remember Stefano talking about introducing it, and that was >> also why I thought we had it already since long time on x86. >> >> Well, anyway... :-) >> >>> What is missing is support for >>> paravirt_steal_rq_enabled which requires to be able to read the >>> stolen >>> time of another cpu. This can't work today as accessing another cpu's >>> vcpu_runstate_info isn't possible without risking inconsistent data. >>> I plan to add support for this, too, but this will require adding >>> another hypercall to map a modified vcpu_runstate_info containing an >>> indicator for an ongoing update of the data. >>> >> Understood. >> >> So, Tony, up for trying again your workload with this patch applied to >> Linux? >> >> Most likely, it _won't_ fix all the problems you're seeing, but I'm >> curious to see if it helps. > > Hi Dario, > > I did not see the patch. Can you please send me the patch and I will > try to test it later.
Here is an updated version. Juergen
>From d4a6e2217adfa8715237738b67a8989528d59cae Mon Sep 17 00:00:00 2001 From: Juergen Gross <jgr...@suse.com> Date: Tue, 17 May 2016 14:03:02 +0200 Subject: [PATCH v2] xen: add steal_clock support on x86 With CONFIG_PARAVIRT_TIME_ACCOUNTING the kernel is capable to account for time a thread wasn't able to run due to hypervisor scheduling. Add support in Xen arch independent time handling for this feature by moving it out of the arm arch into drivers/xen. Signed-off-by: Juergen Gross <jgr...@suse.com> --- arch/arm/xen/enlighten.c | 17 ++--------------- arch/x86/Kconfig | 2 +- arch/x86/xen/time.c | 44 ++------------------------------------------ drivers/xen/time.c | 19 +++++++++++++++++++ include/linux/kernel_stat.h | 1 - include/xen/xen-ops.h | 1 + kernel/sched/cputime.c | 10 ---------- 7 files changed, 25 insertions(+), 69 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 75cd734..9163b94 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -84,19 +84,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); -static unsigned long long xen_stolen_accounting(int cpu) -{ - struct vcpu_runstate_info state; - - BUG_ON(cpu != smp_processor_id()); - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; -} - static void xen_read_wallclock(struct timespec64 *ts) { u32 version; @@ -355,8 +342,8 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); - pv_time_ops.steal_clock = xen_stolen_accounting; - static_key_slow_inc(¶virt_steal_enabled); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7bb1574..3be1fee 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -752,7 +752,7 @@ source "arch/x86/lguest/Kconfig" config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" depends on PARAVIRT - default n + default y if XEN ---help--- Select this option to enable fine granularity task steal time accounting. Time spent executing other tasks in parallel with diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index a0a4e55..6be31df 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -11,8 +11,6 @@ #include <linux/interrupt.h> #include <linux/clocksource.h> #include <linux/clockchips.h> -#include <linux/kernel_stat.h> -#include <linux/math64.h> #include <linux/gfp.h> #include <linux/slab.h> #include <linux/pvclock_gtod.h> @@ -31,44 +29,6 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 runnable, offline, stolen; - cputime_t ticks; - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = this_cpu_ptr(&xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); -} /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) @@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; } - do_stolen_accounting(); - return ret; } @@ -431,6 +389,8 @@ static void __init xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 7107842..6648a78 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -75,6 +75,15 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } +static u64 xen_steal_clock(int cpu) +{ + struct vcpu_runstate_info state; + + BUG_ON(cpu != smp_processor_id()); + xen_get_runstate_snapshot(&state); + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -86,3 +95,13 @@ void xen_setup_runstate_info(int cpu) BUG(); } +void __init xen_time_setup_guest(void) +{ + pv_time_ops.steal_clock = xen_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + /* + * We can't set paravirt_steal_rq_enabled as this would require the + * capability to read another cpu's runstate info. + */ +} diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 25a822f..44fda64 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user) extern void account_process_tick(struct task_struct *, int user); #endif -extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 86abe07..5ce51c2 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); +void __init xen_time_setup_guest(void); void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); int xen_setup_shutdown_event(void); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 75f98c5..8c4c6dc 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int user_tick) } /* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* * Account multiple ticks of idle time. * @ticks: number of stolen ticks */ -- 2.6.6
_______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel