Hi,

Here is an initial diff, adapted from NetBSD, that synchronizes TSC
clocks across cores.

CPU0 is the reference clock and all others are skewed. During CPU
initialization the clocks synchronize by keeping a registry of each CPU
clock skewness and adapting the TSC read routine accordingly.

I choose this implementation over what FreeBSD is doing (which is just
copying Linux really), because it is clean and elegant.

I would love to hear reports from machines that were broken by this.
Mine, which never exhibited the problem in the first place, run just
fine with the following diff. In fact I am writting this message on one
such machine.

Also constructive comments are more than welcomed!

Notes:

- cpu_counter_serializing() could probably have a better name
  (tsc _read for example)
- the PAUSE instruction is probably not needed
- acpi(4) suspend and resume bits are left out on purpose, but should
  be trivial to add once the current diff settles

Paul Irofti

Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.137
diff -u -p -u -p -r1.137 cpu.c
--- arch/amd64/amd64/cpu.c      28 May 2019 18:17:01 -0000      1.137
+++ arch/amd64/amd64/cpu.c      27 Jun 2019 11:55:08 -0000
@@ -96,6 +96,7 @@
 #include <machine/gdt.h>
 #include <machine/pio.h>
 #include <machine/vmmvar.h>
+#include <machine/tsc.h>
 
 #if NLAPIC > 0
 #include <machine/i82489reg.h>
@@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci)
        cr4 = rcr4();
        lcr4(cr4 & ~CR4_PGE);
        lcr4(cr4);
+
+       /* Synchronize TSC */
+       if (!CPU_IS_PRIMARY(ci))
+             tsc_sync_ap(ci);
 #endif
 }
 
@@ -808,6 +813,7 @@ void
 cpu_start_secondary(struct cpu_info *ci)
 {
        int i;
+       u_long s;
 
        ci->ci_flags |= CPUF_AP;
 
@@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci)
                printf("dropping into debugger; continue from here to resume 
boot\n");
                db_enter();
 #endif
+       } else {
+               /*
+                * Synchronize time stamp counters. Invalidate cache and do
+                * twice (in tsc_sync_bp) to minimize possible cache effects.
+                * Disable interrupts to try and rule out any external
+                * interference.
+                */
+               s = intr_disable();
+               wbinvd();
+               tsc_sync_bp(ci);
+               intr_restore(s);
        }
 
+
        if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
                atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
 
@@ -852,6 +870,8 @@ void
 cpu_boot_secondary(struct cpu_info *ci)
 {
        int i;
+       int64_t drift;
+       u_long s;
 
        atomic_setbits_int(&ci->ci_flags, CPUF_GO);
 
@@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci)
                printf("dropping into debugger; continue from here to resume 
boot\n");
                db_enter();
 #endif
+       } else {
+               /* Synchronize TSC again, check for drift. */
+               drift = ci->cpu_cc_skew;
+               s = intr_disable();
+               wbinvd();
+               tsc_sync_bp(ci);
+               intr_restore(s);
+               drift -= ci->cpu_cc_skew;
+               printf("TSC skew=%lld drift=%lld\n",
+                   (long long)ci->cpu_cc_skew, (long long)drift);
+               tsc_sync_drift(drift);
        }
 }
 
@@ -888,7 +919,13 @@ cpu_hatch(void *v)
                panic("%s: already running!?", ci->ci_dev->dv_xname);
 #endif
 
+       /*
+        * Synchronize the TSC for the first time. Note that interrupts are
+        * off at this point.
+        */
+       wbinvd();
        ci->ci_flags |= CPUF_PRESENT;
+       tsc_sync_ap(ci);
 
        lapic_enable();
        lapic_startclock();
Index: arch/amd64/amd64/tsc.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.11
diff -u -p -u -p -r1.11 tsc.c
--- arch/amd64/amd64/tsc.c      6 Jun 2019 19:43:35 -0000       1.11
+++ arch/amd64/amd64/tsc.c      27 Jun 2019 11:55:08 -0000
@@ -1,8 +1,10 @@
 /*     $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $       */
 /*
+ * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <r...@openbsd.org>
  * Copyright (c) 2017 Adam Steen <a...@adamsteen.com.au>
  * Copyright (c) 2017 Mike Belopuhov <m...@openbsd.org>
+ * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -20,6 +22,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/timetc.h>
+#include <sys/atomic.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
@@ -33,6 +36,13 @@ int          tsc_recalibrate;
 uint64_t       tsc_frequency;
 int            tsc_is_invariant;
 
+static int64_t tsc_drift_max = 250;    /* max cycles */
+static int64_t tsc_drift_observed;
+static bool    tsc_good;
+
+static volatile int64_t        tsc_sync_val;
+static volatile struct cpu_info        *tsc_sync_cpu;
+
 uint           tsc_get_timecount(struct timecounter *tc);
 
 struct timecounter tsc_timecounter = {
@@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
                return;
        tsc_frequency = freq;
        tsc_timecounter.tc_frequency = freq;
-#ifndef MULTIPROCESSOR
        if (tsc_is_invariant)
                tsc_timecounter.tc_quality = 2000;
-#endif
 }
 
 void
@@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci
        tsc_frequency = tsc_freq_cpuid(ci);
        tsc_is_invariant = 1;
 
+       tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
+
        /* Newer CPUs don't require recalibration */
        if (tsc_frequency > 0) {
                tsc_timecounter.tc_frequency = tsc_frequency;
-#ifndef MULTIPROCESSOR
                tsc_timecounter.tc_quality = 2000;
-#endif
        } else {
                tsc_recalibrate = 1;
                tsc_frequency = cpufreq;
@@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci
                calibrate_tsc_freq();
        }
 
+       if (tsc_drift_observed > tsc_drift_max) {
+               printf("ERROR: %lld cycle TSC drift observed\n",
+                   (long long)tsc_drift_observed);
+               tsc_timecounter.tc_quality = -100;
+               tsc_is_invariant = 0;
+       }
+
        tc_init(&tsc_timecounter);
+}
+
+static uint64_t
+cpu_counter_serializing(struct cpu_info *ci)
+{
+       if (tsc_good)
+               return rdmsr(MSR_TSC);
+       else
+               return (rdtsc() + ci->cpu_cc_skew);
+}
+
+/*
+ * Record drift (in clock cycles).  Called during AP startup.
+ */
+void
+tsc_sync_drift(int64_t drift)
+{
+
+       if (drift < 0)
+               drift = -drift;
+       if (drift > tsc_drift_observed)
+               tsc_drift_observed = drift;
+}
+
+/*
+ * Called during startup of APs, by the boot processor.  Interrupts
+ * are disabled on entry.
+ */
+static void
+tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
+{
+       uint64_t bptsc;
+
+       if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
+               panic("tsc_sync_bp: 1");
+       }
+
+       /* Flag it and read our TSC. */
+       atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+       bptsc = cpu_counter_serializing(ci) >> 1;
+
+       /* Wait for remote to complete, and read ours again. */
+       while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
+               membar_consumer();
+       }
+       bptsc += (cpu_counter_serializing(ci) >> 1);
+
+       /* Wait for the results to come in. */
+       while (tsc_sync_cpu == ci) {
+               pause();
+       }
+       if (tsc_sync_cpu != NULL) {
+               panic("tsc_sync_bp: 2");
+       }
+
+       *bptscp = bptsc;
+       *aptscp = tsc_sync_val;
+}
+
+void
+tsc_sync_bp(struct cpu_info *ci)
+{
+       uint64_t bptsc, aptsc;
+
+       tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
+       tsc_read_bp(ci, &bptsc, &aptsc);
+
+       /* Compute final value to adjust for skew. */
+       ci->cpu_cc_skew = bptsc - aptsc;
+}
+
+/*
+ * Called during startup of AP, by the AP itself.  Interrupts are
+ * disabled on entry.
+ */
+static void
+tsc_post_ap(struct cpu_info *ci)
+{
+       uint64_t tsc;
+
+       /* Wait for go-ahead from primary. */
+       while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
+               membar_consumer();
+       }
+       tsc = (cpu_counter_serializing(ci) >> 1);
+
+       /* Instruct primary to read its counter. */
+       atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
+       tsc += (cpu_counter_serializing(ci) >> 1);
+
+       /* Post result.  Ensure the whole value goes out atomically. */
+       (void)atomic_swap_64(&tsc_sync_val, tsc);
+
+       if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
+               panic("tsc_sync_ap");
+       }
+}
+
+void
+tsc_sync_ap(struct cpu_info *ci)
+{
+
+       tsc_post_ap(ci);
+       tsc_post_ap(ci);
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.131
diff -u -p -u -p -r1.131 cpu.h
--- arch/amd64/include/cpu.h    17 May 2019 19:07:16 -0000      1.131
+++ arch/amd64/include/cpu.h    27 Jun 2019 11:55:08 -0000
@@ -206,6 +206,8 @@ struct cpu_info {
        union           vmm_cpu_cap ci_vmm_cap;
        paddr_t         ci_vmxon_region_pa;
        struct vmxon_region *ci_vmxon_region;
+
+       int64_t         cpu_cc_skew;            /* counter skew vs cpu0 */
 };
 
 #define CPUF_BSP       0x0001          /* CPU is the original BSP */
@@ -221,6 +223,7 @@ struct cpu_info {
 #define CPUF_INVAR_TSC 0x0100          /* CPU has invariant TSC */
 #define CPUF_USERXSTATE        0x0200          /* CPU has curproc's xsave 
state */
 
+#define        CPUF_SYNCTSC    0x0800          /* Synchronize TSC */
 #define CPUF_PRESENT   0x1000          /* CPU is present */
 #define CPUF_RUNNING   0x2000          /* CPU is running */
 #define CPUF_PAUSE     0x4000          /* CPU is paused in DDB */
Index: arch/amd64/include/cpufunc.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
retrieving revision 1.33
diff -u -p -u -p -r1.33 cpufunc.h
--- arch/amd64/include/cpufunc.h        26 Mar 2019 19:32:46 -0000      1.33
+++ arch/amd64/include/cpufunc.h        27 Jun 2019 11:55:08 -0000
@@ -282,6 +282,11 @@ mfence(void)
        __asm volatile("mfence" : : : "memory");
 }
 
+static __inline void
+pause(void)
+{
+       __asm volatile("pause" : : : "memory");
+}
 static __inline u_int64_t
 rdtsc(void)
 {
Index: arch/amd64/include/tsc.h
===================================================================
RCS file: arch/amd64/include/tsc.h
diff -N arch/amd64/include/tsc.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ arch/amd64/include/tsc.h    27 Jun 2019 11:55:08 -0000
@@ -0,0 +1,25 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _MACHINE_TSC_H_
+#define _MACHINE_TSC_H_
+
+void tsc_sync_drift(int64_t);
+void tsc_sync_bp(struct cpu_info *);
+void tsc_sync_ap(struct cpu_info *);
+
+#endif /* !_MACHINE_TSC_H_ */

Reply via email to