> Date: Thu, 27 Jun 2019 15:08:00 +0300 > From: Paul Irofti <p...@irofti.net> > > Hi, > > Here is an initial diff, adapted from NetBSD, that synchronizes TSC > clocks across cores. > > CPU0 is the reference clock and all others are skewed. During CPU > initialization the clocks synchronize by keeping a registry of each CPU > clock skewness and adapting the TSC read routine accordingly. > > I choose this implementation over what FreeBSD is doing (which is just > copying Linux really), because it is clean and elegant. > > I would love to hear reports from machines that were broken by this. > Mine, which never exhibited the problem in the first place, run just > fine with the following diff. In fact I am writting this message on one > such machine. > > Also constructive comments are more than welcomed! > > Notes: > > - cpu_counter_serializing() could probably have a better name > (tsc _read for example) > - the PAUSE instruction is probably not needed > - acpi(4) suspend and resume bits are left out on purpose, but should > be trivial to add once the current diff settles > > Paul Irofti
I don't think we want to introduce a <machine/tsc.h> header file. The code suffers from some NetBSD-isms, so that'll need to be fixed. I pointed some of them out below. Also, how accurate is your skew detection? What skew is detected on a machine that (supposedly) has the TSCs in sync? The result will be that you actually slightly desync the counters on different CPUs. I think Linux uses the TSC_ADJUST MSR and compares its value across cores. If the skew is small and the TSC_ADJUST values are the same across cores it skips the TSC adjustments. Cheers, Mark > > Index: arch/amd64/amd64/cpu.c > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v > retrieving revision 1.137 > diff -u -p -u -p -r1.137 cpu.c > --- arch/amd64/amd64/cpu.c 28 May 2019 18:17:01 -0000 1.137 > +++ arch/amd64/amd64/cpu.c 27 Jun 2019 11:55:08 -0000 > @@ -96,6 +96,7 @@ > #include <machine/gdt.h> > #include <machine/pio.h> > #include <machine/vmmvar.h> > +#include <machine/tsc.h> > > #if NLAPIC > 0 > #include <machine/i82489reg.h> > @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci) > cr4 = rcr4(); > lcr4(cr4 & ~CR4_PGE); > lcr4(cr4); > + > + /* Synchronize TSC */ > + if (!CPU_IS_PRIMARY(ci)) > + tsc_sync_ap(ci); > #endif > } > > @@ -808,6 +813,7 @@ void > cpu_start_secondary(struct cpu_info *ci) > { > int i; > + u_long s; > > ci->ci_flags |= CPUF_AP; > > @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* > + * Synchronize time stamp counters. Invalidate cache and do > + * twice (in tsc_sync_bp) to minimize possible cache effects. > + * Disable interrupts to try and rule out any external > + * interference. > + */ > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > } > > + Please don't introduce additional empty lines. > if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { > atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY); > > @@ -852,6 +870,8 @@ void > cpu_boot_secondary(struct cpu_info *ci) > { > int i; > + int64_t drift; > + u_long s; > > atomic_setbits_int(&ci->ci_flags, CPUF_GO); > > @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci) > printf("dropping into debugger; continue from here to resume > boot\n"); > db_enter(); > #endif > + } else { > + /* Synchronize TSC again, check for drift. */ > + drift = ci->cpu_cc_skew; > + s = intr_disable(); > + wbinvd(); > + tsc_sync_bp(ci); > + intr_restore(s); > + drift -= ci->cpu_cc_skew; > + printf("TSC skew=%lld drift=%lld\n", > + (long long)ci->cpu_cc_skew, (long long)drift); > + tsc_sync_drift(drift); > } > } > > @@ -888,7 +919,13 @@ cpu_hatch(void *v) > panic("%s: already running!?", ci->ci_dev->dv_xname); > #endif > > + /* > + * Synchronize the TSC for the first time. Note that interrupts are > + * off at this point. > + */ > + wbinvd(); > ci->ci_flags |= CPUF_PRESENT; > + tsc_sync_ap(ci); > > lapic_enable(); > lapic_startclock(); > Index: arch/amd64/amd64/tsc.c > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v > retrieving revision 1.11 > diff -u -p -u -p -r1.11 tsc.c > --- arch/amd64/amd64/tsc.c 6 Jun 2019 19:43:35 -0000 1.11 > +++ arch/amd64/amd64/tsc.c 27 Jun 2019 11:55:08 -0000 > @@ -1,8 +1,10 @@ > /* $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $ */ > /* > + * Copyright (c) 2008 The NetBSD Foundation, Inc. > * Copyright (c) 2016,2017 Reyk Floeter <r...@openbsd.org> > * Copyright (c) 2017 Adam Steen <a...@adamsteen.com.au> > * Copyright (c) 2017 Mike Belopuhov <m...@openbsd.org> > + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org> > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above > @@ -20,6 +22,7 @@ > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/timetc.h> > +#include <sys/atomic.h> > > #include <machine/cpu.h> > #include <machine/cpufunc.h> > @@ -33,6 +36,13 @@ int tsc_recalibrate; > uint64_t tsc_frequency; > int tsc_is_invariant; > > +static int64_t tsc_drift_max = 250; /* max cycles */ > +static int64_t tsc_drift_observed; > +static bool tsc_good; > + > +static volatile int64_t tsc_sync_val; > +static volatile struct cpu_info *tsc_sync_cpu; > + No static in the kernel please. > uint tsc_get_timecount(struct timecounter *tc); > > struct timecounter tsc_timecounter = { > @@ -172,10 +182,8 @@ calibrate_tsc_freq(void) > return; > tsc_frequency = freq; > tsc_timecounter.tc_frequency = freq; > -#ifndef MULTIPROCESSOR > if (tsc_is_invariant) > tsc_timecounter.tc_quality = 2000; > -#endif > } > > void > @@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci > tsc_frequency = tsc_freq_cpuid(ci); > tsc_is_invariant = 1; > > + tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0); > + > /* Newer CPUs don't require recalibration */ > if (tsc_frequency > 0) { > tsc_timecounter.tc_frequency = tsc_frequency; > -#ifndef MULTIPROCESSOR > tsc_timecounter.tc_quality = 2000; > -#endif > } else { > tsc_recalibrate = 1; > tsc_frequency = cpufreq; > @@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci > calibrate_tsc_freq(); > } > > + if (tsc_drift_observed > tsc_drift_max) { > + printf("ERROR: %lld cycle TSC drift observed\n", > + (long long)tsc_drift_observed); > + tsc_timecounter.tc_quality = -100; > + tsc_is_invariant = 0; > + } > + > tc_init(&tsc_timecounter); > +} > + > +static uint64_t > +cpu_counter_serializing(struct cpu_info *ci) > +{ > + if (tsc_good) > + return rdmsr(MSR_TSC); > + else > + return (rdtsc() + ci->cpu_cc_skew); > +} > + > +/* > + * Record drift (in clock cycles). Called during AP startup. > + */ > +void > +tsc_sync_drift(int64_t drift) > +{ > + No blank line here. > + if (drift < 0) > + drift = -drift; > + if (drift > tsc_drift_observed) > + tsc_drift_observed = drift; > +} > + > +/* > + * Called during startup of APs, by the boot processor. Interrupts > + * are disabled on entry. > + */ > +static void > +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp) > +{ > + uint64_t bptsc; > + > + if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) { > + panic("tsc_sync_bp: 1"); > + } > + > + /* Flag it and read our TSC. */ > + atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC); > + bptsc = cpu_counter_serializing(ci) >> 1; > + > + /* Wait for remote to complete, and read ours again. */ > + while ((ci->ci_flags & CPUF_SYNCTSC) != 0) { > + membar_consumer(); > + } > + bptsc += (cpu_counter_serializing(ci) >> 1); > + > + /* Wait for the results to come in. */ > + while (tsc_sync_cpu == ci) { > + pause(); Maybe this should be CPU_BUSY_CYCLE()? > + } > + if (tsc_sync_cpu != NULL) { > + panic("tsc_sync_bp: 2"); > + } There is a bit of excessive use of curly braces in this function. > + > + *bptscp = bptsc; > + *aptscp = tsc_sync_val; > +} > + > +void > +tsc_sync_bp(struct cpu_info *ci) > +{ > + uint64_t bptsc, aptsc; > + > + tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */ > + tsc_read_bp(ci, &bptsc, &aptsc); > + > + /* Compute final value to adjust for skew. */ > + ci->cpu_cc_skew = bptsc - aptsc; > +} > + > +/* > + * Called during startup of AP, by the AP itself. Interrupts are > + * disabled on entry. > + */ > +static void > +tsc_post_ap(struct cpu_info *ci) > +{ > + uint64_t tsc; > + > + /* Wait for go-ahead from primary. */ > + while ((ci->ci_flags & CPUF_SYNCTSC) == 0) { > + membar_consumer(); > + } > + tsc = (cpu_counter_serializing(ci) >> 1); > + > + /* Instruct primary to read its counter. */ > + atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC); > + tsc += (cpu_counter_serializing(ci) >> 1); > + > + /* Post result. Ensure the whole value goes out atomically. */ > + (void)atomic_swap_64(&tsc_sync_val, tsc); > + > + if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) { > + panic("tsc_sync_ap"); > + } > +} > + > +void > +tsc_sync_ap(struct cpu_info *ci) > +{ > + > + tsc_post_ap(ci); > + tsc_post_ap(ci); > } > Index: arch/amd64/include/cpu.h > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v > retrieving revision 1.131 > diff -u -p -u -p -r1.131 cpu.h > --- arch/amd64/include/cpu.h 17 May 2019 19:07:16 -0000 1.131 > +++ arch/amd64/include/cpu.h 27 Jun 2019 11:55:08 -0000 > @@ -206,6 +206,8 @@ struct cpu_info { > union vmm_cpu_cap ci_vmm_cap; > paddr_t ci_vmxon_region_pa; > struct vmxon_region *ci_vmxon_region; > + > + int64_t cpu_cc_skew; /* counter skew vs cpu0 */ > }; > > #define CPUF_BSP 0x0001 /* CPU is the original BSP */ > @@ -221,6 +223,7 @@ struct cpu_info { > #define CPUF_INVAR_TSC 0x0100 /* CPU has invariant TSC */ > #define CPUF_USERXSTATE 0x0200 /* CPU has curproc's xsave > state */ > > +#define CPUF_SYNCTSC 0x0800 /* Synchronize TSC */ > #define CPUF_PRESENT 0x1000 /* CPU is present */ > #define CPUF_RUNNING 0x2000 /* CPU is running */ > #define CPUF_PAUSE 0x4000 /* CPU is paused in DDB */ > Index: arch/amd64/include/cpufunc.h > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v > retrieving revision 1.33 > diff -u -p -u -p -r1.33 cpufunc.h > --- arch/amd64/include/cpufunc.h 26 Mar 2019 19:32:46 -0000 1.33 > +++ arch/amd64/include/cpufunc.h 27 Jun 2019 11:55:08 -0000 > @@ -282,6 +282,11 @@ mfence(void) > __asm volatile("mfence" : : : "memory"); > } > > +static __inline void > +pause(void) > +{ > + __asm volatile("pause" : : : "memory"); > +} > static __inline u_int64_t > rdtsc(void) > { > Index: arch/amd64/include/tsc.h > =================================================================== > RCS file: arch/amd64/include/tsc.h > diff -N arch/amd64/include/tsc.h > --- /dev/null 1 Jan 1970 00:00:00 -0000 > +++ arch/amd64/include/tsc.h 27 Jun 2019 11:55:08 -0000 > @@ -0,0 +1,25 @@ > +/* $OpenBSD$ */ > +/* > + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org> > + * > + * Permission to use, copy, modify, and/or distribute this software for any > + * purpose with or without fee is hereby granted, provided that the above > + * copyright notice and this permission notice appear in all copies. > + * > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + */ > + > +#ifndef _MACHINE_TSC_H_ > +#define _MACHINE_TSC_H_ > + > +void tsc_sync_drift(int64_t); > +void tsc_sync_bp(struct cpu_info *); > +void tsc_sync_ap(struct cpu_info *); > + > +#endif /* !_MACHINE_TSC_H_ */ > >