From: David Woodhouse <[email protected]>

Add a read_snapshot() callback to struct clocksource which returns the
derived clocksource value while also providing the underlying hardware
counter reading and the related clocksource ID.

This allows ktime_get_snapshot_id() to populate new hw_cycles and hw_csid
fields in struct system_time_snapshot.

For clocksources that are derived from an underlying counter (e.g., Hyper-V
TSC page scales TSC to 10MHz, kvmclock scales TSC to 1GHz), this provides
atomic access to both the derived value needed for timekeeping
calculations, and the raw hardware counter needed by consumers like KVM's
master clock and the vmclock PTP driver.

[ tglx: Reworked it slightly ]

Signed-off-by: David Woodhouse <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Assisted-by: Kiro:claude-opus-4.6-1m
Link: https://patch.msgid.link/[email protected]
---
 include/linux/clocksource.h |   24 ++++++++++++++++++++++++
 include/linux/timekeeping.h |    6 ++++++
 kernel/time/timekeeping.c   |   21 ++++++++++++++++++++-
 3 files changed, 50 insertions(+), 1 deletion(-)
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -32,6 +32,21 @@ struct module;
 #include <vdso/clocksource.h>
 
 /**
+ * struct clocksource_hw_snapshot - Snapshot for the underlying hardware 
counter of derived
+ *                                 clocksources like kvmclock or Hyper-V 
scaled TSC
+ * @hw_cycles:         The hardware counter value
+ * @hw_csid:           Clocksource ID of the hardware counter
+ *
+ * Such clocksources must implement the read_snapshot() callback and fill in 
the
+ * hardware counter value, the clocksource ID of the hardware counter and 
derive
+ * the actual clocksource cycles from @hw_cycles to provide an atomic snapshot
+ */
+struct clocksource_hw_snapshot {
+       u64                     hw_cycles;
+       enum clocksource_ids    hw_csid;
+};
+
+/**
  * struct clocksource - hardware abstraction for a free running counter
  *     Provides mostly state-free accessors to the underlying hardware.
  *     This is the structure used for system time.
@@ -72,6 +87,14 @@ struct module;
  * @flags:             Flags describing special properties
  * @base:              Hardware abstraction for clock on which a clocksource
  *                     is based
+ * @read_snapshot:     Extended @read() function for clocksources such as
+ *                     kvmclock or the Hyper-V scaled TSC where the actual
+ *                     clocksource value for timekeeping is calculated from an
+ *                     underlying hardware counter. Returns the timekeeping
+ *                     relevant cycle value and stores the raw value of the
+ *                     underlying counter from which it was calculated
+ *                     including the clocksource ID of that counter in the
+ *                     clocksource hardware snapshot.
  * @enable:            Optional function to enable the clocksource
  * @disable:           Optional function to disable the clocksource
  * @suspend:           Optional suspend function for the clocksource
@@ -113,6 +136,7 @@ struct clocksource {
        unsigned long           flags;
        struct clocksource_base *base;
 
+       u64                     (*read_snapshot)(struct clocksource *cs, struct 
clocksource_hw_snapshot *chs);
        int                     (*enable)(struct clocksource *cs);
        void                    (*disable)(struct clocksource *cs);
        void                    (*suspend)(struct clocksource *cs);
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -279,18 +279,24 @@ static inline bool ktime_get_aux_ts64(cl
  * struct system_time_snapshot - Simultaneous time capture of 
CLOCK_MONOTONIC_RAW,
  *                              a selected CLOCK_* and the clocksource counter 
value
  * @cycles:            Clocksource counter value to produce the system times
+ * @hw_cycles:         For derived clocksources, the hardware counter value 
from
+ *                     which @cycles was derived
  * @systime:           The system time of the selected CLOCK ID
  * @monoraw:           Monotonic raw system time
  * @cs_id:             Clocksource ID
+ * @hw_csid:           Clocksource ID of the underlying hardware counter for 
derived
+ *                     clocksources which implement the read_snapshot() 
callback.
  * @clock_was_set_seq: The sequence number of clock-was-set events
  * @cs_was_changed_seq:        The sequence number of clocksource change events
  * @valid:             True if the snapshot is valid
  */
 struct system_time_snapshot {
        u64                     cycles;
+       u64                     hw_cycles;
        ktime_t                 systime;
        ktime_t                 monoraw;
        enum clocksource_ids    cs_id;
+       enum clocksource_ids    hw_csid;
        unsigned int            clock_was_set_seq;
        u8                      cs_was_changed_seq;
        u8                      valid;
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -320,6 +320,7 @@ static __always_inline u64 tk_clock_read
 
        return clock->read(clock);
 }
+
 static inline void clocksource_disable_inline_read(void) { }
 static inline void clocksource_enable_inline_read(void) { }
 #endif
@@ -1187,6 +1188,18 @@ noinstr time64_t __ktime_get_real_second
        return tk->xtime_sec;
 }
 
+static inline u64 tk_clock_read_snapshot(const struct tk_read_base *tkr,
+                                        struct clocksource_hw_snapshot *chs)
+{
+       struct clocksource *clock = READ_ONCE(tkr->clock);
+
+       if (unlikely(clock->read_snapshot))
+               return clock->read_snapshot(clock, chs);
+
+       return clock->read(clock);
+}
+
+
 /**
  * ktime_get_snapshot_id -  Simultaneously snapshot a given clock ID with
  *                         CLOCK_MONOTONIC_RAW and the underlying
@@ -1237,14 +1250,20 @@ void ktime_get_snapshot_id(clockid_t clo
        tk = &tkd->timekeeper;
 
        do {
+               struct clocksource_hw_snapshot chs = { };
+
                seq = read_seqcount_begin(&tkd->seq);
 
                /* Aux clocks can be invalid */
                if (!tk->clock_valid)
                        return;
 
-               now = tk_clock_read(&tk->tkr_mono);
+               now = tk_clock_read_snapshot(&tk->tkr_mono, &chs);
                systime_snapshot->cs_id = tk->tkr_mono.clock->id;
+
+               systime_snapshot->hw_cycles = chs.hw_cycles;
+               systime_snapshot->hw_csid = chs.hw_csid;
+
                systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
                systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 


Reply via email to