On Fri, Apr 18 2025 at 08:37, Thomas Gleixner wrote:
> On Thu, Apr 17 2025 at 17:46, John Stultz wrote:
>> Instead it seems like we should just do:
>>   tk->coarse_nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
>
> You end up with the same problem again because xtime_nsec can move
> backwards when the multiplier is updated, no?

Something like the below should work.

Thanks,

        tglx
---
 include/linux/timekeeper_internal.h |   10 ++++-
 kernel/time/timekeeping.c           |   62 ++++++++++++++++++++++++++++++++----
 kernel/time/vsyscall.c              |    4 +-
 3 files changed, 65 insertions(+), 11 deletions(-)

--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -51,7 +51,7 @@ struct tk_read_base {
  * @offs_real:                 Offset clock monotonic -> clock realtime
  * @offs_boot:                 Offset clock monotonic -> clock boottime
  * @offs_tai:                  Offset clock monotonic -> clock tai
- * @tai_offset:                        The current UTC to TAI offset in seconds
+ * @coarse_nsec:               The nanoseconds part for coarse time getters
  * @tkr_raw:                   The readout base structure for 
CLOCK_MONOTONIC_RAW
  * @raw_sec:                   CLOCK_MONOTONIC_RAW  time in seconds
  * @clock_was_set_seq:         The sequence number of clock was set events
@@ -76,6 +76,8 @@ struct tk_read_base {
  *                             ntp shifted nano seconds.
  * @ntp_err_mult:              Multiplication factor for scaled math conversion
  * @skip_second_overflow:      Flag used to avoid updating NTP twice with same 
second
+ * @tai_offset:                        The current UTC to TAI offset in seconds
+ * @coarse_offset:             The offset of the coarse timekeeper in clock 
cycles
  *
  * Note: For timespec(64) based interfaces wall_to_monotonic is what
  * we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -100,7 +102,7 @@ struct tk_read_base {
  * which results in the following cacheline layout:
  *
  * 0:  seqcount, tkr_mono
- * 1:  xtime_sec ... tai_offset
+ * 1:  xtime_sec ... coarse_nsec
  * 2:  tkr_raw, raw_sec
  * 3,4: Internal variables
  *
@@ -121,7 +123,7 @@ struct timekeeper {
        ktime_t                 offs_real;
        ktime_t                 offs_boot;
        ktime_t                 offs_tai;
-       s32                     tai_offset;
+       u32                     coarse_nsec;
 
        /* Cacheline 2: */
        struct tk_read_base     tkr_raw;
@@ -144,6 +146,8 @@ struct timekeeper {
        u32                     ntp_error_shift;
        u32                     ntp_err_mult;
        u32                     skip_second_overflow;
+       s32                     tai_offset;
+       u32                     coarse_offset;
 };
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -164,6 +164,15 @@ static inline struct timespec64 tk_xtime
        return ts;
 }
 
+static inline struct timespec64 tk_xtime_coarse(const struct timekeeper *tk)
+{
+       struct timespec64 ts;
+
+       ts.tv_sec = tk->xtime_sec;
+       ts.tv_nsec = tk->coarse_nsec;
+       return ts;
+}
+
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec = ts->tv_sec;
@@ -252,6 +261,7 @@ static void tk_setup_internals(struct ti
        tk->tkr_raw.clock = clock;
        tk->tkr_raw.mask = clock->mask;
        tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
+       tk->coarse_offset = 0;
 
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -636,6 +646,34 @@ static void timekeeping_restore_shadow(s
        memcpy(&tkd->shadow_timekeeper, &tkd->timekeeper, 
sizeof(tkd->timekeeper));
 }
 
+/*
+ * Update the nanoseconds part for the coarse time keepers. They can't rely
+ * on xtime_nsec because xtime_nsec is adjusted when the multiplication
+ * factor of the clock is adjusted. See timekeeping_apply_adjustment().
+ *
+ * This is required because tk_read::cycle_last must be advanced by
+ * timekeeper::cycle_interval so that the accumulation happens with a
+ * periodic reference.
+ *
+ * But that adjustment of xtime_nsec can make it go backward to compensate
+ * for a larger multiplicator.
+ *
+ * timekeeper::offset contains the leftover cycles which were not accumulated.
+ * Therefore the nanoseconds portion of the time when the clocksource was
+ * read in timekeeping_advance() is:
+ *
+ *     nsec = (xtime_nsec + offset * mult) >> shift;
+ *
+ * Calculate that value and store it in timekeeper::coarse_nsec, from where
+ * the coarse time getters consume it.
+ */
+static inline void tk_update_coarse_nsecs(struct timekeeper *tk)
+{
+       u64 offset = (u64)tk->coarse_offset * tk->tkr_mono.mult;
+
+       tk->coarse_nsec = (tk->tkr_mono.xtime_nsec + offset) >> 
tk->tkr_mono.shift;
+}
+
 static void timekeeping_update_from_shadow(struct tk_data *tkd, unsigned int 
action)
 {
        struct timekeeper *tk = &tk_core.shadow_timekeeper;
@@ -658,6 +696,7 @@ static void timekeeping_update_from_shad
 
        tk_update_leap_state(tk);
        tk_update_ktime_data(tk);
+       tk_update_coarse_nsecs(tk);
 
        update_vsyscall(tk);
        update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
@@ -708,6 +747,12 @@ static void timekeeping_forward_now(stru
                tk_normalize_xtime(tk);
                delta -= incr;
        }
+
+       /*
+        * Clear the offset for the coarse time as the above forward
+        * brought the offset down to zero.
+        */
+       tk->coarse_offset = 0;
 }
 
 /**
@@ -804,8 +849,8 @@ EXPORT_SYMBOL_GPL(ktime_get_with_offset)
 ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       unsigned int seq;
        ktime_t base, *offset = offsets[offs];
+       unsigned int seq;
        u64 nsecs;
 
        WARN_ON(timekeeping_suspended);
@@ -813,7 +858,7 @@ ktime_t ktime_get_coarse_with_offset(enu
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                base = ktime_add(tk->tkr_mono.base, *offset);
-               nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+               nsecs = tk->coarse_nsec;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1831,6 +1876,8 @@ void timekeeping_resume(void)
        /* Re-base the last cycle value */
        tks->tkr_mono.cycle_last = cycle_now;
        tks->tkr_raw.cycle_last  = cycle_now;
+       /* Reset the offset for the coarse time getters */
+       tks->coarse_offset = 0;
 
        tks->ntp_error = 0;
        timekeeping_suspended = 0;
@@ -2205,6 +2252,9 @@ static bool timekeeping_advance(enum tim
         */
        clock_set |= accumulate_nsecs_to_secs(tk);
 
+       /* Compensate the coarse time getters xtime_nsec offset */
+       tk->coarse_offset = (u32)offset;
+
        timekeeping_update_from_shadow(&tk_core, clock_set);
 
        return !!clock_set;
@@ -2248,7 +2298,7 @@ void ktime_get_coarse_real_ts64(struct t
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               *ts = tk_xtime(tk);
+               *ts = tk_xtime_coarse(tk);
        } while (read_seqcount_retry(&tk_core.seq, seq));
 }
 EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
@@ -2271,7 +2321,7 @@ void ktime_get_coarse_real_ts64_mg(struc
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               *ts = tk_xtime(tk);
+               *ts = tk_xtime_coarse(tk);
                offset = tk_core.timekeeper.offs_real;
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -2350,12 +2400,12 @@ void ktime_get_coarse_ts64(struct timesp
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               now = tk_xtime(tk);
+               now = tk_xtime_coarse(tk);
                mono = tk->wall_to_monotonic;
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
        set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
-                               now.tv_nsec + mono.tv_nsec);
+                                 now.tv_nsec + mono.tv_nsec);
 }
 EXPORT_SYMBOL(ktime_get_coarse_ts64);
 
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -98,12 +98,12 @@ void update_vsyscall(struct timekeeper *
        /* CLOCK_REALTIME_COARSE */
        vdso_ts         = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
        vdso_ts->sec    = tk->xtime_sec;
-       vdso_ts->nsec   = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+       vdso_ts->nsec   = tk->coarse_nsec;
 
        /* CLOCK_MONOTONIC_COARSE */
        vdso_ts         = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
        vdso_ts->sec    = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-       nsec            = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+       nsec            = tk->coarse_nsec;
        nsec            = nsec + tk->wall_to_monotonic.tv_nsec;
        vdso_ts->sec    += __iter_div_u64_rem(nsec, NSEC_PER_SEC, 
&vdso_ts->nsec);
 

Reply via email to