In some testing on arm64 platforms, I was seeing null ptr
crashes in the kselftest/timers clocksource-switch test.

This was happening in a read function like:
u64 clocksource_mmio_readl_down(struct clocksource *c)
{
    return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
}

Where the callers enter the seqlock, and then call something
like:
    cycle_now = tkr->read(tkr->clock);

The problem seeming to be that since the ->read() and ->clock
pointer references are happening separately, its possible the
clocksource change happens in between and we end up calling the
old ->read() function with the new clocksource, (or vice-versa)
which causes the to_mmio_clksrc() in the read function to run
off into space.

This patch tries to address the issue by providing a helper
function that atomically reads the clock value and then calls
the clock->read(clock) function so that we always call the read
funciton with the appropriate clocksource and don't accidentally
mix them.

The one exception where this helper isn't necessary is for the
fast-timekepers which use their own locking and update logic
to the tkr structures.

Cc: Thomas Gleixner <t...@linutronix.de>
Cc: Ingo Molnar <mi...@kernel.org>
Cc: Miroslav Lichvar <mlich...@redhat.com>
Cc: Richard Cochran <richardcoch...@gmail.com>
Cc: Prarit Bhargava <pra...@redhat.com>
Cc: Stephen Boyd <stephen.b...@linaro.org>
Cc: Daniel Mentz <danielme...@google.com>
Cc: stable <sta...@vger.kernel.org>
Acked-by: Ingo Molnar <mi...@kernel.org>
Signed-off-by: John Stultz <john.stu...@linaro.org>
---
v2: Addressed Ingo's feedback on wording
---
 kernel/time/timekeeping.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc5..797c73e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper 
*tk, ktime_t delta)
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base 
*tkr)
         */
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               now = tkr->read(tkr->clock);
+               now = tk_clock_read(tkr);
                last = tkr->cycle_last;
                mask = tkr->mask;
                max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base 
*tkr)
        u64 cycle_now, delta;
 
        /* read clocksource */
-       cycle_now = tkr->read(tkr->clock);
+       cycle_now = tk_clock_read(tkr);
 
        /* calculate the delta since the last update_wall_time */
        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -240,7 +260,7 @@ static void tk_setup_internals(struct timekeeper *tk, 
struct clocksource *clock)
        tk->tkr_mono.clock = clock;
        tk->tkr_mono.read = clock->read;
        tk->tkr_mono.mask = clock->mask;
-       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+       tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
        tk->tkr_raw.clock = clock;
        tk->tkr_raw.read = clock->read;
@@ -477,7 +497,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
        struct tk_read_base *tkr = &tk->tkr_mono;
 
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       cycles_at_suspend = tkr->read(tkr->clock);
+       cycles_at_suspend = tk_clock_read(tkr);
        tkr_dummy.read = dummy_clock_read;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
@@ -649,11 +669,10 @@ static void timekeeping_update(struct timekeeper *tk, 
unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-       struct clocksource *clock = tk->tkr_mono.clock;
        u64 cycle_now, delta;
        u64 nsec;
 
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
        delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, 
tk->tkr_mono.mask);
        tk->tkr_mono.cycle_last = cycle_now;
        tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +948,7 @@ void ktime_get_snapshot(struct system_time_snapshot 
*systime_snapshot)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
                base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1126,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
                 * Check whether the system counter value provided by the
                 * device driver is on the current timekeeping interval.
                 */
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                interval_start = tk->tkr_mono.cycle_last;
                if (!cycle_between(interval_start, cycles, now)) {
                        clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1647,7 @@ void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
                cycle_now > tk->tkr_mono.cycle_last) {
                u64 nsec, cyc_delta;
@@ -2030,7 +2048,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+       offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
                                   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
-- 
2.7.4

Reply via email to