On Sat, Mar 15 2025 at 16:22, John Stultz wrote:
> On Sat, Mar 15, 2025 at 12:23 PM Thomas Gleixner <t...@linutronix.de> wrote:
>> > So to fix this, rework the timekeeping_advance() logic a bit
>> > so that when we are called from do_adjtimex() and the offset
>> > is smaller then cycle_interval, that we call
>> > timekeeping_forward(), to first accumulate the sub-interval
>> > time into xtime_nsec. Then with no unaccumulated cycles in
>> > offset, we can do the mult adjustment without worry of the
>> > subtraction having an impact.
>>
>> It's a smart solution. I briefly pondered something similar, but I'm not
>> really fond of the fact, that it causes a clock_was_set() event for no
>> good reason.
>>
>> clock_was_set() means that there is a time jump. But that's absolutely
>> not the case with do_adjtimex() changing the frequency for quick
>> adjustments. That does not affect continuity at all.
>
> Oh, good point.  I wasn't thinking clearly about the semantics, and
> was being a little paranoid there (as most calls to
> timekeeping_forward_now() have clock_was_set() calls that follow). I
> suspect I can do away with that bit and avoid it.  I'll respin later
> this week.

Actually your patch is not even emitting a clock_was_set() event:

> +     if (offset < real_tk->cycle_interval) {
> +             timekeeping_forward(tk, now);
> +             *clock_set = 1;
> +             return 0;
> +     }

#define TK_CLEAR_NTP            (1 << 0)
#define TK_CLOCK_WAS_SET        (1 << 1)

So it clears NTP instead. Not really what you want either. :)

But yes, it simply can forward the clock without side effects.

I think that this should be done for all TICK_ADV_FREQ adjustments. In
case of such asynchronous adjustments it does not make any sense to take
the old ntp_error value into account and accumlate some more. In fact
this simply should clear ntp_error so the new value goes into effect as
provided by NTP and not skewed by ntp_error.

These async adjustments (usually very small ones) happen when the
current source degrades and chronyd/ntpd switches over to a new server.

Something like the below.

Thanks

        tglx
---
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -682,20 +682,19 @@ static void timekeeping_update_from_shad
 }
 
 /**
- * timekeeping_forward_now - update clock to the current time
+ * timekeeping_forward - update clock to given cycle now value
  * @tk:                Pointer to the timekeeper to update
+ * @cycle_now:  Current clocksource read value
  *
  * Forward the current clock to update its state since the last call to
  * update_wall_time(). This is useful before significant clock changes,
  * as it avoids having to deal with this time offset explicitly.
  */
-static void timekeeping_forward_now(struct timekeeper *tk)
+static void timekeeping_forward(struct timekeeper *tk, u64 cycle_now)
 {
-       u64 cycle_now, delta;
+       u64 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, 
tk->tkr_mono.mask,
+                                     tk->tkr_mono.clock->max_raw_delta);
 
-       cycle_now = tk_clock_read(&tk->tkr_mono);
-       delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, 
tk->tkr_mono.mask,
-                                 tk->tkr_mono.clock->max_raw_delta);
        tk->tkr_mono.cycle_last = cycle_now;
        tk->tkr_raw.cycle_last  = cycle_now;
 
@@ -711,6 +710,21 @@ static void timekeeping_forward_now(stru
 }
 
 /**
+ * timekeeping_forward_now - update clock to the current time
+ * @tk:                Pointer to the timekeeper to update
+ *
+ * Forward the current clock to update its state since the last call to
+ * update_wall_time(). This is useful before significant clock changes,
+ * as it avoids having to deal with this time offset explicitly.
+ */
+static void timekeeping_forward_now(struct timekeeper *tk)
+{
+       u64 cycle_now = tk_clock_read(&tk->tkr_mono);
+
+       timekeeping_forward(tk, cycle_now);
+}
+
+/**
  * ktime_get_real_ts64 - Returns the time of day in a timespec64.
  * @ts:                pointer to the timespec to be set
  *
@@ -2151,6 +2165,54 @@ static u64 logarithmic_accumulation(stru
        return offset;
 }
 
+static u64 timekeeping_accumulate(struct timekeeper *tk, u64 offset,
+                                 enum timekeeping_adv_mode mode,
+                                 unsigned int *clock_set)
+{
+       int shift = 0, maxshift;
+
+       /*
+        * TK_ADV_FREQ indicates that adjtimex(2) directly set the
+        * frequency or the tick length.
+        *
+        * Accumulate the offset, so that the new multiplier starts from
+        * now. This is required as otherwise for offsets, which are
+        * smaller than tk::cycle_interval, timekeeping_adjust() could set
+        * xtime_nsec backwards, which subsequently causes time going
+        * backwards in the coarse time getters. But even for the case
+        * where offset is greater than tk::cycle_interval the periodic
+        * accumulation does not have much value.
+        *
+        * Also reset tk::ntp_error as it does not make sense to keep the
+        * old accumulated error around in this case.
+        */
+       if (mode == TK_ADV_FREQ) {
+               timekeeping_forward(tk, tk->tkr_mono.cycle_last + offset);
+               tk->ntp_error = 0;
+               return 0;
+       }
+
+       /*
+        * With NO_HZ we may have to accumulate many cycle_intervals
+        * (think "ticks") worth of time at once. To do this efficiently,
+        * we calculate the largest doubling multiple of cycle_intervals
+        * that is smaller than the offset.  We then accumulate that
+        * chunk in one go, and then try to consume the next smaller
+        * doubled multiple.
+        */
+       shift = ilog2(offset) - ilog2(tk->cycle_interval);
+       shift = max(0, shift);
+       /* Bound shift to one less than what overflows tick_length */
+       maxshift = (64 - (ilog2(ntp_tick_length()) + 1)) - 1;
+       shift = min(shift, maxshift);
+       while (offset >= tk->cycle_interval) {
+               offset = logarithmic_accumulation(tk, offset, shift, clock_set);
+               if (offset < tk->cycle_interval << shift)
+                       shift--;
+       }
+       return offset;
+}
+
 /*
  * timekeeping_advance - Updates the timekeeper to the current time and
  * current NTP tick length
@@ -2160,7 +2222,6 @@ static bool timekeeping_advance(enum tim
        struct timekeeper *tk = &tk_core.shadow_timekeeper;
        struct timekeeper *real_tk = &tk_core.timekeeper;
        unsigned int clock_set = 0;
-       int shift = 0, maxshift;
        u64 offset;
 
        guard(raw_spinlock_irqsave)(&tk_core.lock);
@@ -2177,24 +2238,7 @@ static bool timekeeping_advance(enum tim
        if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
                return false;
 
-       /*
-        * With NO_HZ we may have to accumulate many cycle_intervals
-        * (think "ticks") worth of time at once. To do this efficiently,
-        * we calculate the largest doubling multiple of cycle_intervals
-        * that is smaller than the offset.  We then accumulate that
-        * chunk in one go, and then try to consume the next smaller
-        * doubled multiple.
-        */
-       shift = ilog2(offset) - ilog2(tk->cycle_interval);
-       shift = max(0, shift);
-       /* Bound shift to one less than what overflows tick_length */
-       maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
-       shift = min(shift, maxshift);
-       while (offset >= tk->cycle_interval) {
-               offset = logarithmic_accumulation(tk, offset, shift, 
&clock_set);
-               if (offset < tk->cycle_interval<<shift)
-                       shift--;
-       }
+       offset = timekeeping_accumulate(tk, offset, mode, &clock_set);
 
        /* Adjust the multiplier to correct NTP error */
        timekeeping_adjust(tk, offset);




Reply via email to