Linus,

please pull the latest timers-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
timers-urgent-for-linus

A few fixes for timekeeping and timers:

  - Plug a subtle race due to a missing READ_ONCE() in the timekeeping code
    where reloading of a pointer results in an inconsistent callback
    argument being supplied to the clocksource->read function.

  - Correct the CLOCK_MONOTONIC_RAW sub-nanosecond accounting in the time
    keeping core code, to prevent a possible discontuity.

  - Apply a similar fix to the arm64 vdso clock_gettime() implementation

  - Add missing includes to clocksource drivers, which relied on indirect
    includes which fails in certain configs.

  - Use the proper iomem pointer for read/iounmap in a probe function.

Thanks,

        tglx

------------------>
Frank Rowand (1):
      clocksource/drivers/arm_arch_timer: Fix read and iounmap of incorrect 
variable

John Stultz (2):
      time: Fix clock->read(clock) race around clocksource changes
      time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting

Stephen Rothwell (1):
      clocksource: Explicitly include linux/clocksource.h when needed

Will Deacon (1):
      arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW


 arch/arm64/kernel/vdso.c                |  5 ++-
 arch/arm64/kernel/vdso/gettimeofday.S   |  1 -
 drivers/clocksource/arm_arch_timer.c    |  4 +-
 drivers/clocksource/cadence_ttc_timer.c |  1 +
 drivers/clocksource/timer-sun5i.c       |  1 +
 include/linux/timekeeper_internal.h     |  5 +--
 kernel/time/timekeeping.c               | 71 +++++++++++++++++++++------------
 7 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 41b6e31f8f55..d0cb007fa482 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
                /* tkr_mono.cycle_last == tkr_raw.cycle_last */
                vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                vdso_data->raw_time_sec         = tk->raw_time.tv_sec;
-               vdso_data->raw_time_nsec        = tk->raw_time.tv_nsec;
+               vdso_data->raw_time_nsec        = (tk->raw_time.tv_nsec <<
+                                                  tk->tkr_raw.shift) +
+                                                 tk->tkr_raw.xtime_nsec;
                vdso_data->xtime_clock_sec      = tk->xtime_sec;
                vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
-               /* tkr_raw.xtime_nsec == 0 */
                vdso_data->cs_mono_mult         = tk->tkr_mono.mult;
                vdso_data->cs_raw_mult          = tk->tkr_raw.mult;
                /* tkr_mono.shift == tkr_raw.shift */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S 
b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..76320e920965 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -256,7 +256,6 @@ monotonic_raw:
        seqcnt_check fail=monotonic_raw
 
        /* All computations are done with left-shifted nsecs. */
-       lsl     x14, x14, x12
        get_nsec_per_sec res=x9
        lsl     x9, x9, x12
 
diff --git a/drivers/clocksource/arm_arch_timer.c 
b/drivers/clocksource/arm_arch_timer.c
index 4bed671e490e..8b5c30062d99 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1209,9 +1209,9 @@ arch_timer_mem_frame_get_cntfrq(struct 
arch_timer_mem_frame *frame)
                return 0;
        }
 
-       rate = readl_relaxed(frame + CNTFRQ);
+       rate = readl_relaxed(base + CNTFRQ);
 
-       iounmap(frame);
+       iounmap(base);
 
        return rate;
 }
diff --git a/drivers/clocksource/cadence_ttc_timer.c 
b/drivers/clocksource/cadence_ttc_timer.c
index 44e5e951583b..8e64b8460f11 100644
--- a/drivers/clocksource/cadence_ttc_timer.c
+++ b/drivers/clocksource/cadence_ttc_timer.c
@@ -18,6 +18,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/slab.h>
diff --git a/drivers/clocksource/timer-sun5i.c 
b/drivers/clocksource/timer-sun5i.c
index 2e9c830ae1cd..c4656c4d44a6 100644
--- a/drivers/clocksource/timer-sun5i.c
+++ b/drivers/clocksource/timer-sun5i.c
@@ -12,6 +12,7 @@
 
 #include <linux/clk.h>
 #include <linux/clockchips.h>
+#include <linux/clocksource.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/include/linux/timekeeper_internal.h 
b/include/linux/timekeeper_internal.h
index 110f4532188c..f7043ccca81c 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -29,7 +29,6 @@
  */
 struct tk_read_base {
        struct clocksource      *clock;
-       u64                     (*read)(struct clocksource *cs);
        u64                     mask;
        u64                     cycle_last;
        u32                     mult;
@@ -58,7 +57,7 @@ struct tk_read_base {
  *                     interval.
  * @xtime_remainder:   Shifted nano seconds left over when rounding
  *                     @cycle_interval
- * @raw_interval:      Raw nano seconds accumulated per NTP interval.
+ * @raw_interval:      Shifted raw nano seconds accumulated per NTP interval.
  * @ntp_error:         Difference between accumulated time and NTP time in ntp
  *                     shifted nano seconds.
  * @ntp_error_shift:   Shift conversion between clock shifted nano seconds and
@@ -100,7 +99,7 @@ struct timekeeper {
        u64                     cycle_interval;
        u64                     xtime_interval;
        s64                     xtime_remainder;
-       u32                     raw_interval;
+       u64                     raw_interval;
        /* The ntp_tick_length() value currently being used.
         * This cached copy ensures we consistently apply the tick
         * length for an entire tick, as ntp_tick_length may change
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc57fd09..b602c48cb841 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper 
*tk, ktime_t delta)
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function.  This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       return clock->read(clock);
+}
+
 #ifdef CONFIG_DEBUG_TIMEKEEPING
 #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
 
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base 
*tkr)
         */
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               now = tkr->read(tkr->clock);
+               now = tk_clock_read(tkr);
                last = tkr->cycle_last;
                mask = tkr->mask;
                max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base 
*tkr)
        u64 cycle_now, delta;
 
        /* read clocksource */
-       cycle_now = tkr->read(tkr->clock);
+       cycle_now = tk_clock_read(tkr);
 
        /* calculate the delta since the last update_wall_time */
        delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, 
struct clocksource *clock)
        ++tk->cs_was_changed_seq;
        old_clock = tk->tkr_mono.clock;
        tk->tkr_mono.clock = clock;
-       tk->tkr_mono.read = clock->read;
        tk->tkr_mono.mask = clock->mask;
-       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+       tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
 
        tk->tkr_raw.clock = clock;
-       tk->tkr_raw.read = clock->read;
        tk->tkr_raw.mask = clock->mask;
        tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
@@ -262,7 +280,7 @@ static void tk_setup_internals(struct timekeeper *tk, 
struct clocksource *clock)
        /* Go back from cycles -> shifted ns */
        tk->xtime_interval = interval * clock->mult;
        tk->xtime_remainder = ntpinterval - tk->xtime_interval;
-       tk->raw_interval = (interval * clock->mult) >> clock->shift;
+       tk->raw_interval = interval * clock->mult;
 
         /* if changing clocks, convert xtime_nsec shift units */
        if (old_clock) {
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct 
tk_fast *tkf)
 
                now += timekeeping_delta_to_ns(tkr,
                                clocksource_delta(
-                                       tkr->read(tkr->clock),
+                                       tk_clock_read(tkr),
                                        tkr->cycle_last,
                                        tkr->mask));
        } while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
        return cycles_at_suspend;
 }
 
+static struct clocksource dummy_clock = {
+       .read = dummy_clock_read,
+};
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
        struct tk_read_base *tkr = &tk->tkr_mono;
 
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       cycles_at_suspend = tkr->read(tkr->clock);
-       tkr_dummy.read = dummy_clock_read;
+       cycles_at_suspend = tk_clock_read(tkr);
+       tkr_dummy.clock = &dummy_clock;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
        tkr = &tk->tkr_raw;
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
-       tkr_dummy.read = dummy_clock_read;
+       tkr_dummy.clock = &dummy_clock;
        update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, 
unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-       struct clocksource *clock = tk->tkr_mono.clock;
        u64 cycle_now, delta;
        u64 nsec;
 
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
        delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, 
tk->tkr_mono.mask);
        tk->tkr_mono.cycle_last = cycle_now;
        tk->tkr_raw.cycle_last  = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot 
*systime_snapshot)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
                base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
                 * Check whether the system counter value provided by the
                 * device driver is on the current timekeeping interval.
                 */
-               now = tk->tkr_mono.read(tk->tkr_mono.clock);
+               now = tk_clock_read(&tk->tkr_mono);
                interval_start = tk->tkr_mono.cycle_last;
                if (!cycle_between(interval_start, cycles, now)) {
                        clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = tk->tkr_mono.read(clock);
+       cycle_now = tk_clock_read(&tk->tkr_mono);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
                cycle_now > tk->tkr_mono.cycle_last) {
                u64 nsec, cyc_delta;
@@ -1976,7 +1996,7 @@ static u64 logarithmic_accumulation(struct timekeeper 
*tk, u64 offset,
                                    u32 shift, unsigned int *clock_set)
 {
        u64 interval = tk->cycle_interval << shift;
-       u64 raw_nsecs;
+       u64 snsec_per_sec;
 
        /* If the offset is smaller than a shifted interval, do nothing */
        if (offset < interval)
@@ -1991,14 +2011,15 @@ static u64 logarithmic_accumulation(struct timekeeper 
*tk, u64 offset,
        *clock_set |= accumulate_nsecs_to_secs(tk);
 
        /* Accumulate raw time */
-       raw_nsecs = (u64)tk->raw_interval << shift;
-       raw_nsecs += tk->raw_time.tv_nsec;
-       if (raw_nsecs >= NSEC_PER_SEC) {
-               u64 raw_secs = raw_nsecs;
-               raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
-               tk->raw_time.tv_sec += raw_secs;
+       tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << 
tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+       snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+       while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+               tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+               tk->raw_time.tv_sec++;
        }
-       tk->raw_time.tv_nsec = raw_nsecs;
+       tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
+       tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << 
tk->tkr_raw.shift;
 
        /* Accumulate error between NTP and clock interval */
        tk->ntp_error += tk->ntp_tick << shift;
@@ -2030,7 +2051,7 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+       offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
                                   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 

Reply via email to