Hi, Here is an initial diff, adapted from NetBSD, that synchronizes TSC clocks across cores.
CPU0 is the reference clock and all others are skewed. During CPU initialization the clocks synchronize by keeping a registry of each CPU clock skewness and adapting the TSC read routine accordingly. I choose this implementation over what FreeBSD is doing (which is just copying Linux really), because it is clean and elegant. I would love to hear reports from machines that were broken by this. Mine, which never exhibited the problem in the first place, run just fine with the following diff. In fact I am writting this message on one such machine. Also constructive comments are more than welcomed! Notes: - cpu_counter_serializing() could probably have a better name (tsc _read for example) - the PAUSE instruction is probably not needed - acpi(4) suspend and resume bits are left out on purpose, but should be trivial to add once the current diff settles Paul Irofti Index: arch/amd64/amd64/cpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.137 diff -u -p -u -p -r1.137 cpu.c --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137 +++ arch/amd64/amd64/cpu.c 27 Jun 2019 11:55:08 -0000 @@ -96,6 +96,7 @@ #include <machine/gdt.h> #include <machine/pio.h> #include <machine/vmmvar.h> +#include <machine/tsc.h> #if NLAPIC > 0 #include <machine/i82489reg.h> @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci) cr4 = rcr4(); lcr4(cr4 & ~CR4_PGE); lcr4(cr4); + + /* Synchronize TSC */ + if (!CPU_IS_PRIMARY(ci)) + tsc_sync_ap(ci); #endif } @@ -808,6 +813,7 @@ void cpu_start_secondary(struct cpu_info *ci) { int i; + u_long s; ci->ci_flags |= CPUF_AP; @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* + * Synchronize time stamp counters. Invalidate cache and do + * twice (in tsc_sync_bp) to minimize possible cache effects. + * Disable interrupts to try and rule out any external + * interference. + */ + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); } + if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY); @@ -852,6 +870,8 @@ void cpu_boot_secondary(struct cpu_info *ci) { int i; + int64_t drift; + u_long s; atomic_setbits_int(&ci->ci_flags, CPUF_GO); @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci) printf("dropping into debugger; continue from here to resume boot\n"); db_enter(); #endif + } else { + /* Synchronize TSC again, check for drift. */ + drift = ci->cpu_cc_skew; + s = intr_disable(); + wbinvd(); + tsc_sync_bp(ci); + intr_restore(s); + drift -= ci->cpu_cc_skew; + printf("TSC skew=%lld drift=%lld\n", + (long long)ci->cpu_cc_skew, (long long)drift); + tsc_sync_drift(drift); } } @@ -888,7 +919,13 @@ cpu_hatch(void *v) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif + /* + * Synchronize the TSC for the first time. Note that interrupts are + * off at this point. + */ + wbinvd(); ci->ci_flags |= CPUF_PRESENT; + tsc_sync_ap(ci); lapic_enable(); lapic_startclock(); Index: arch/amd64/amd64/tsc.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.11 diff -u -p -u -p -r1.11 tsc.c --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11 +++ arch/amd64/amd64/tsc.c 27 Jun 2019 11:55:08 -0000 @@ -1,8 +1,10 @@ /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ /* + * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter <r...@openbsd.org> * Copyright (c) 2017 Adam Steen <a...@adamsteen.com.au> * Copyright (c) 2017 Mike Belopuhov <m...@openbsd.org> + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org> * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -20,6 +22,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/timetc.h> +#include <sys/atomic.h> #include <machine/cpu.h> #include <machine/cpufunc.h> @@ -33,6 +36,13 @@ int tsc_recalibrate; uint64_t tsc_frequency; int tsc_is_invariant; +static int64_t tsc_drift_max = 250; /* max cycles */ +static int64_t tsc_drift_observed; +static bool tsc_good; + +static volatile int64_t tsc_sync_val; +static volatile struct cpu_info *tsc_sync_cpu; + uint tsc_get_timecount(struct timecounter *tc); struct timecounter tsc_timecounter = { @@ -172,10 +182,8 @@ calibrate_tsc_freq(void) return; tsc_frequency = freq; tsc_timecounter.tc_frequency = freq; -#ifndef MULTIPROCESSOR if (tsc_is_invariant) tsc_timecounter.tc_quality = 2000; -#endif } void @@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci tsc_frequency = tsc_freq_cpuid(ci); tsc_is_invariant = 1; + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0); + /* Newer CPUs don't require recalibration */ if (tsc_frequency > 0) { tsc_timecounter.tc_frequency = tsc_frequency; -#ifndef MULTIPROCESSOR tsc_timecounter.tc_quality = 2000; -#endif } else { tsc_recalibrate = 1; tsc_frequency = cpufreq; @@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci calibrate_tsc_freq(); } + if (tsc_drift_observed > tsc_drift_max) { + printf("ERROR: %lld cycle TSC drift observed\n", + (long long)tsc_drift_observed); + tsc_timecounter.tc_quality = -100; + tsc_is_invariant = 0; + } + tc_init(&tsc_timecounter); +} + +static uint64_t +cpu_counter_serializing(struct cpu_info *ci) +{ + if (tsc_good) + return rdmsr(MSR_TSC); + else + return (rdtsc() + ci->cpu_cc_skew); +} + +/* + * Record drift (in clock cycles). Called during AP startup. + */ +void +tsc_sync_drift(int64_t drift) +{ + + if (drift < 0) + drift = -drift; + if (drift > tsc_drift_observed) + tsc_drift_observed = drift; +} + +/* + * Called during startup of APs, by the boot processor. Interrupts + * are disabled on entry. + */ +static void +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp) +{ + uint64_t bptsc; + + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) { + panic("tsc_sync_bp: 1"); + } + + /* Flag it and read our TSC. */ + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC); + bptsc = cpu_counter_serializing(ci) >> 1; + + /* Wait for remote to complete, and read ours again. */ + while ((ci->ci_flags & CPUF_SYNCTSC) != 0) { + membar_consumer(); + } + bptsc += (cpu_counter_serializing(ci) >> 1); + + /* Wait for the results to come in. */ + while (tsc_sync_cpu == ci) { + pause(); + } + if (tsc_sync_cpu != NULL) { + panic("tsc_sync_bp: 2"); + } + + *bptscp = bptsc; + *aptscp = tsc_sync_val; +} + +void +tsc_sync_bp(struct cpu_info *ci) +{ + uint64_t bptsc, aptsc; + + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */ + tsc_read_bp(ci, &bptsc, &aptsc); + + /* Compute final value to adjust for skew. */ + ci->cpu_cc_skew = bptsc - aptsc; +} + +/* + * Called during startup of AP, by the AP itself. Interrupts are + * disabled on entry. + */ +static void +tsc_post_ap(struct cpu_info *ci) +{ + uint64_t tsc; + + /* Wait for go-ahead from primary. */ + while ((ci->ci_flags & CPUF_SYNCTSC) == 0) { + membar_consumer(); + } + tsc = (cpu_counter_serializing(ci) >> 1); + + /* Instruct primary to read its counter. */ + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC); + tsc += (cpu_counter_serializing(ci) >> 1); + + /* Post result. Ensure the whole value goes out atomically. */ + (void)atomic_swap_64(&tsc_sync_val, tsc); + + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) { + panic("tsc_sync_ap"); + } +} + +void +tsc_sync_ap(struct cpu_info *ci) +{ + + tsc_post_ap(ci); + tsc_post_ap(ci); } Index: arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.131 diff -u -p -u -p -r1.131 cpu.h --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131 +++ arch/amd64/include/cpu.h 27 Jun 2019 11:55:08 -0000 @@ -206,6 +206,8 @@ struct cpu_info { union vmm_cpu_cap ci_vmm_cap; paddr_t ci_vmxon_region_pa; struct vmxon_region *ci_vmxon_region; + + int64_t cpu_cc_skew; /* counter skew vs cpu0 */ }; #define CPUF_BSP 0x0001 /* CPU is the original BSP */ @@ -221,6 +223,7 @@ struct cpu_info { #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */ #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave state */ +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */ #define CPUF_PRESENT 0x1000 /* CPU is present */ #define CPUF_RUNNING 0x2000 /* CPU is running */ #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */ Index: arch/amd64/include/cpufunc.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v retrieving revision 1.33 diff -u -p -u -p -r1.33 cpufunc.h --- arch/amd64/include/cpufunc.h 26 Mar 2019 19:32:46 -0000 1.33 +++ arch/amd64/include/cpufunc.h 27 Jun 2019 11:55:08 -0000 @@ -282,6 +282,11 @@ mfence(void) __asm volatile("mfence" : : : "memory"); } +static __inline void +pause(void) +{ + __asm volatile("pause" : : : "memory"); +} static __inline u_int64_t rdtsc(void) { Index: arch/amd64/include/tsc.h =================================================================== RCS file: arch/amd64/include/tsc.h diff -N arch/amd64/include/tsc.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ arch/amd64/include/tsc.h 27 Jun 2019 11:55:08 -0000 @@ -0,0 +1,25 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _MACHINE_TSC_H_ +#define _MACHINE_TSC_H_ + +void tsc_sync_drift(int64_t); +void tsc_sync_bp(struct cpu_info *); +void tsc_sync_ap(struct cpu_info *); + +#endif /* !_MACHINE_TSC_H_ */