On Mon, Feb 27, 2017 at 05:11:34PM +0100, Paolo Bonzini wrote: > On 27/02/2017 16:59, Peter Zijlstra wrote:
> > Now, I've not yet figured out the ordering between when we set > > pv_time_ops.sched_clock and when we do the 'normal' TSC init stuff. > > I think the ordering is fine: > > - pv_time_ops.sched_clock is set here: > > start_kernel (init/main.c line 509) > setup_arch > kvmclock_init > kvm_sched_clock_init > > - TSC can be declared unstable only after this: > > start_kernel (init/main.c line 628) > late_time_init > tsc_init > > So by the time the tsc_cs_mark_unstable or mark_tsc_unstable can call > clear_sched_clock_stable, pv_time_ops.sched_clock has been set. > > > But it appears to me, we should not be calling > > clear_sched_clock_stable() on TSC bits when we don't end up using > > native_sched_clock(). > > Yes, this makes sense. Something like the below then (completely untested for now) has a chance of working. It does however change behaviour a little. I'm trying to debug something else; after that I'll give this a little more consideration. --- arch/x86/kernel/cpu/amd.c | 4 +--- arch/x86/kernel/cpu/centaur.c | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/cyrix.c | 2 +- arch/x86/kernel/cpu/intel.c | 4 +--- arch/x86/kernel/cpu/transmeta.c | 2 +- arch/x86/kernel/tsc.c | 33 +++++++++++++++++++++------------ 7 files changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..bc3bbb6a8ab0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,10 +555,8 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); } else { - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..0fdff183aa30 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -105,7 +105,7 @@ static void early_init_centaur(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f07005e6f461..2b7ff648ea25 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -86,7 +86,7 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); } static const struct cpu_dev default_cpu = { @@ -1076,7 +1076,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_init) this_cpu->c_init(c); else - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..35057d67e864 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -184,7 +184,7 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..e0e192e43a4c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -161,10 +161,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); } else { - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..fa243ffd1c84 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -16,7 +16,7 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - clear_sched_clock_stable(); + mark_tsc_unstable("not invariant"); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..bf6627aff54d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -326,9 +326,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1112,7 +1119,8 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) if (tsc_unstable) return; tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1134,18 +1142,19 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) + clocksource_mark_unstable(&clocksource_tsc); + else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } }