> Date: Thu, 27 Jun 2019 15:08:00 +0300
> From: Paul Irofti <p...@irofti.net>
> 
> Hi,
> 
> Here is an initial diff, adapted from NetBSD, that synchronizes TSC
> clocks across cores.
> 
> CPU0 is the reference clock and all others are skewed. During CPU
> initialization the clocks synchronize by keeping a registry of each CPU
> clock skewness and adapting the TSC read routine accordingly.
> 
> I choose this implementation over what FreeBSD is doing (which is just
> copying Linux really), because it is clean and elegant.
> 
> I would love to hear reports from machines that were broken by this.
> Mine, which never exhibited the problem in the first place, run just
> fine with the following diff. In fact I am writting this message on one
> such machine.
> 
> Also constructive comments are more than welcomed!
> 
> Notes:
> 
> - cpu_counter_serializing() could probably have a better name
>   (tsc _read for example)
> - the PAUSE instruction is probably not needed
> - acpi(4) suspend and resume bits are left out on purpose, but should
>   be trivial to add once the current diff settles
> 
> Paul Irofti

I don't think we want to introduce a <machine/tsc.h> header file.

The code suffers from some NetBSD-isms, so that'll need to be fixed.
I pointed some of them out below.

Also, how accurate is your skew detection?  What skew is detected on a
machine that (supposedly) has the TSCs in sync?  The result will be
that you actually slightly desync the counters on different CPUs.

I think Linux uses the TSC_ADJUST MSR and compares its value across
cores.  If the skew is small and the TSC_ADJUST values are the same
across cores it skips the TSC adjustments.

Cheers,

Mark

> 
> Index: arch/amd64/amd64/cpu.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
> retrieving revision 1.137
> diff -u -p -u -p -r1.137 cpu.c
> --- arch/amd64/amd64/cpu.c    28 May 2019 18:17:01 -0000      1.137
> +++ arch/amd64/amd64/cpu.c    27 Jun 2019 11:55:08 -0000
> @@ -96,6 +96,7 @@
>  #include <machine/gdt.h>
>  #include <machine/pio.h>
>  #include <machine/vmmvar.h>
> +#include <machine/tsc.h>
>  
>  #if NLAPIC > 0
>  #include <machine/i82489reg.h>
> @@ -754,6 +755,10 @@ cpu_init(struct cpu_info *ci)
>       cr4 = rcr4();
>       lcr4(cr4 & ~CR4_PGE);
>       lcr4(cr4);
> +
> +     /* Synchronize TSC */
> +     if (!CPU_IS_PRIMARY(ci))
> +           tsc_sync_ap(ci);
>  #endif
>  }
>  
> @@ -808,6 +813,7 @@ void
>  cpu_start_secondary(struct cpu_info *ci)
>  {
>       int i;
> +     u_long s;
>  
>       ci->ci_flags |= CPUF_AP;
>  
> @@ -828,8 +834,20 @@ cpu_start_secondary(struct cpu_info *ci)
>               printf("dropping into debugger; continue from here to resume 
> boot\n");
>               db_enter();
>  #endif
> +     } else {
> +             /*
> +              * Synchronize time stamp counters. Invalidate cache and do
> +              * twice (in tsc_sync_bp) to minimize possible cache effects.
> +              * Disable interrupts to try and rule out any external
> +              * interference.
> +              */
> +             s = intr_disable();
> +             wbinvd();
> +             tsc_sync_bp(ci);
> +             intr_restore(s);
>       }
>  
> +

Please don't introduce additional empty lines.

>       if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) {
>               atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFY);
>  
> @@ -852,6 +870,8 @@ void
>  cpu_boot_secondary(struct cpu_info *ci)
>  {
>       int i;
> +     int64_t drift;
> +     u_long s;
>  
>       atomic_setbits_int(&ci->ci_flags, CPUF_GO);
>  
> @@ -864,6 +884,17 @@ cpu_boot_secondary(struct cpu_info *ci)
>               printf("dropping into debugger; continue from here to resume 
> boot\n");
>               db_enter();
>  #endif
> +     } else {
> +             /* Synchronize TSC again, check for drift. */
> +             drift = ci->cpu_cc_skew;
> +             s = intr_disable();
> +             wbinvd();
> +             tsc_sync_bp(ci);
> +             intr_restore(s);
> +             drift -= ci->cpu_cc_skew;
> +             printf("TSC skew=%lld drift=%lld\n",
> +                 (long long)ci->cpu_cc_skew, (long long)drift);
> +             tsc_sync_drift(drift);
>       }
>  }
>  
> @@ -888,7 +919,13 @@ cpu_hatch(void *v)
>               panic("%s: already running!?", ci->ci_dev->dv_xname);
>  #endif
>  
> +     /*
> +      * Synchronize the TSC for the first time. Note that interrupts are
> +      * off at this point.
> +      */
> +     wbinvd();
>       ci->ci_flags |= CPUF_PRESENT;
> +     tsc_sync_ap(ci);
>  
>       lapic_enable();
>       lapic_startclock();
> Index: arch/amd64/amd64/tsc.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.11
> diff -u -p -u -p -r1.11 tsc.c
> --- arch/amd64/amd64/tsc.c    6 Jun 2019 19:43:35 -0000       1.11
> +++ arch/amd64/amd64/tsc.c    27 Jun 2019 11:55:08 -0000
> @@ -1,8 +1,10 @@
>  /*   $OpenBSD: tsc.c,v 1.11 2019/06/06 19:43:35 kettenis Exp $       */
>  /*
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
>   * Copyright (c) 2016,2017 Reyk Floeter <r...@openbsd.org>
>   * Copyright (c) 2017 Adam Steen <a...@adamsteen.com.au>
>   * Copyright (c) 2017 Mike Belopuhov <m...@openbsd.org>
> + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org>
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -20,6 +22,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/timetc.h>
> +#include <sys/atomic.h>
>  
>  #include <machine/cpu.h>
>  #include <machine/cpufunc.h>
> @@ -33,6 +36,13 @@ int                tsc_recalibrate;
>  uint64_t     tsc_frequency;
>  int          tsc_is_invariant;
>  
> +static int64_t       tsc_drift_max = 250;    /* max cycles */
> +static int64_t       tsc_drift_observed;
> +static bool  tsc_good;
> +
> +static volatile int64_t      tsc_sync_val;
> +static volatile struct cpu_info      *tsc_sync_cpu;
> +

No static in the kernel please.

>  uint         tsc_get_timecount(struct timecounter *tc);
>  
>  struct timecounter tsc_timecounter = {
> @@ -172,10 +182,8 @@ calibrate_tsc_freq(void)
>               return;
>       tsc_frequency = freq;
>       tsc_timecounter.tc_frequency = freq;
> -#ifndef MULTIPROCESSOR
>       if (tsc_is_invariant)
>               tsc_timecounter.tc_quality = 2000;
> -#endif
>  }
>  
>  void
> @@ -208,12 +216,12 @@ tsc_timecounter_init(struct cpu_info *ci
>       tsc_frequency = tsc_freq_cpuid(ci);
>       tsc_is_invariant = 1;
>  
> +     tsc_good = (rdmsr(MSR_TSC) != 0 || rdmsr(MSR_TSC) != 0);
> +
>       /* Newer CPUs don't require recalibration */
>       if (tsc_frequency > 0) {
>               tsc_timecounter.tc_frequency = tsc_frequency;
> -#ifndef MULTIPROCESSOR
>               tsc_timecounter.tc_quality = 2000;
> -#endif
>       } else {
>               tsc_recalibrate = 1;
>               tsc_frequency = cpufreq;
> @@ -221,5 +229,116 @@ tsc_timecounter_init(struct cpu_info *ci
>               calibrate_tsc_freq();
>       }
>  
> +     if (tsc_drift_observed > tsc_drift_max) {
> +             printf("ERROR: %lld cycle TSC drift observed\n",
> +                 (long long)tsc_drift_observed);
> +             tsc_timecounter.tc_quality = -100;
> +             tsc_is_invariant = 0;
> +     }
> +
>       tc_init(&tsc_timecounter);
> +}
> +
> +static uint64_t
> +cpu_counter_serializing(struct cpu_info *ci)
> +{
> +     if (tsc_good)
> +             return rdmsr(MSR_TSC);
> +     else
> +             return (rdtsc() + ci->cpu_cc_skew);
> +}
> +
> +/*
> + * Record drift (in clock cycles).  Called during AP startup.
> + */
> +void
> +tsc_sync_drift(int64_t drift)
> +{
> +

No blank line here.

> +     if (drift < 0)
> +             drift = -drift;
> +     if (drift > tsc_drift_observed)
> +             tsc_drift_observed = drift;
> +}
> +
> +/*
> + * Called during startup of APs, by the boot processor.  Interrupts
> + * are disabled on entry.
> + */
> +static void
> +tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
> +{
> +     uint64_t bptsc;
> +
> +     if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
> +             panic("tsc_sync_bp: 1");
> +     }
> +
> +     /* Flag it and read our TSC. */
> +     atomic_setbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> +     bptsc = cpu_counter_serializing(ci) >> 1;
> +
> +     /* Wait for remote to complete, and read ours again. */
> +     while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
> +             membar_consumer();
> +     }
> +     bptsc += (cpu_counter_serializing(ci) >> 1);
> +
> +     /* Wait for the results to come in. */
> +     while (tsc_sync_cpu == ci) {
> +             pause();

Maybe this should be CPU_BUSY_CYCLE()?

> +     }
> +     if (tsc_sync_cpu != NULL) {
> +             panic("tsc_sync_bp: 2");
> +     }

There is a bit of excessive use of curly braces in this function.

> +
> +     *bptscp = bptsc;
> +     *aptscp = tsc_sync_val;
> +}
> +
> +void
> +tsc_sync_bp(struct cpu_info *ci)
> +{
> +     uint64_t bptsc, aptsc;
> +
> +     tsc_read_bp(ci, &bptsc, &aptsc); /* discarded - cache effects */
> +     tsc_read_bp(ci, &bptsc, &aptsc);
> +
> +     /* Compute final value to adjust for skew. */
> +     ci->cpu_cc_skew = bptsc - aptsc;
> +}
> +
> +/*
> + * Called during startup of AP, by the AP itself.  Interrupts are
> + * disabled on entry.
> + */
> +static void
> +tsc_post_ap(struct cpu_info *ci)
> +{
> +     uint64_t tsc;
> +
> +     /* Wait for go-ahead from primary. */
> +     while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
> +             membar_consumer();
> +     }
> +     tsc = (cpu_counter_serializing(ci) >> 1);
> +
> +     /* Instruct primary to read its counter. */
> +     atomic_clearbits_int(&ci->ci_flags, CPUF_SYNCTSC);
> +     tsc += (cpu_counter_serializing(ci) >> 1);
> +
> +     /* Post result.  Ensure the whole value goes out atomically. */
> +     (void)atomic_swap_64(&tsc_sync_val, tsc);
> +
> +     if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
> +             panic("tsc_sync_ap");
> +     }
> +}
> +
> +void
> +tsc_sync_ap(struct cpu_info *ci)
> +{
> +
> +     tsc_post_ap(ci);
> +     tsc_post_ap(ci);
>  }
> Index: arch/amd64/include/cpu.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.131
> diff -u -p -u -p -r1.131 cpu.h
> --- arch/amd64/include/cpu.h  17 May 2019 19:07:16 -0000      1.131
> +++ arch/amd64/include/cpu.h  27 Jun 2019 11:55:08 -0000
> @@ -206,6 +206,8 @@ struct cpu_info {
>       union           vmm_cpu_cap ci_vmm_cap;
>       paddr_t         ci_vmxon_region_pa;
>       struct vmxon_region *ci_vmxon_region;
> +
> +     int64_t         cpu_cc_skew;            /* counter skew vs cpu0 */
>  };
>  
>  #define CPUF_BSP     0x0001          /* CPU is the original BSP */
> @@ -221,6 +223,7 @@ struct cpu_info {
>  #define CPUF_INVAR_TSC       0x0100          /* CPU has invariant TSC */
>  #define CPUF_USERXSTATE      0x0200          /* CPU has curproc's xsave 
> state */
>  
> +#define      CPUF_SYNCTSC    0x0800          /* Synchronize TSC */
>  #define CPUF_PRESENT 0x1000          /* CPU is present */
>  #define CPUF_RUNNING 0x2000          /* CPU is running */
>  #define CPUF_PAUSE   0x4000          /* CPU is paused in DDB */
> Index: arch/amd64/include/cpufunc.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/cpufunc.h,v
> retrieving revision 1.33
> diff -u -p -u -p -r1.33 cpufunc.h
> --- arch/amd64/include/cpufunc.h      26 Mar 2019 19:32:46 -0000      1.33
> +++ arch/amd64/include/cpufunc.h      27 Jun 2019 11:55:08 -0000
> @@ -282,6 +282,11 @@ mfence(void)
>       __asm volatile("mfence" : : : "memory");
>  }
>  
> +static __inline void
> +pause(void)
> +{
> +     __asm volatile("pause" : : : "memory");
> +}
>  static __inline u_int64_t
>  rdtsc(void)
>  {
> Index: arch/amd64/include/tsc.h
> ===================================================================
> RCS file: arch/amd64/include/tsc.h
> diff -N arch/amd64/include/tsc.h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ arch/amd64/include/tsc.h  27 Jun 2019 11:55:08 -0000
> @@ -0,0 +1,25 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2019 Paul Irofti <piro...@openbsd.org>
> + *
> + * Permission to use, copy, modify, and/or distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#ifndef _MACHINE_TSC_H_
> +#define _MACHINE_TSC_H_
> +
> +void tsc_sync_drift(int64_t);
> +void tsc_sync_bp(struct cpu_info *);
> +void tsc_sync_ap(struct cpu_info *);
> +
> +#endif       /* !_MACHINE_TSC_H_ */
> 
> 

Reply via email to