On Thu, Aug 31, 2017 at 12:23:42PM -0000, Anna-Maria Gleixner wrote:
> @@ -540,12 +539,23 @@ static ktime_t __hrtimer_get_next_event(
>  
>       hrtimer_update_next_timer(cpu_base, NULL);
>  
> +     if (!cpu_base->softirq_activated) {
> +             active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
> +             expires_next = __hrtimer_next_event_base(cpu_base, active,
> +                                                      expires_next);
> +             cpu_base->softirq_expires_next = expires_next;
> +     }
> +

So this bugged the living daylight out of me. That's a very asymmetric
thing to do. The normal rule is that get_next_event() updates
cpu_base::next_timer and hrtimer_reprogram() updates
cpu_base::expires_next. And here you make a giant mess of things.

The below is a fairly large diff on top of this patch which is
_completely_ untested, but should hopefully restore sanity.

 - fixes the mixing of bool and bitfields in cpu_base
 - restores the whole get_next_timer() / reprogram() symmetry
   by adding cpu_base::softirq_next_timer.
 - adds a comment hopefully explaining the why of that
 - adds for_each_active_base() helper, we had two copies of that, and
   for a brief moment, I had another few, luckily those didn't survive
   :-)
 - uses irqsave/irqrestore for the cpu_base->lock fiddling around
   __run_hrtimer().
 - folded hrtimer_reprogram() and hrtimer_reprogram_softirq()
 - removed superfluous local_bh_disable(), since local_irq_disable()
   already implies much the same.

Please consider...

---
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -186,21 +186,26 @@ struct hrtimer_cpu_base {
        unsigned int                    cpu;
        unsigned int                    active_bases;
        unsigned int                    clock_was_set_seq;
-       bool                            migration_enabled;
-       bool                            nohz_active;
-       bool                            softirq_activated;
-       unsigned int                    hres_active     : 1,
-                                       in_hrtirq       : 1,
-                                       hang_detected   : 1;
+
+       unsigned int                    migration_enabled : 1;
+       unsigned int                    nohz_active       : 1;
+       unsigned int                    softirq_activated : 1;
+       unsigned int                    hres_active       : 1;
+       unsigned int                    in_hrtirq         : 1;
+       unsigned int                    hang_detected     : 1;
+
 #ifdef CONFIG_HIGH_RES_TIMERS
        unsigned int                    nr_events;
        unsigned int                    nr_retries;
        unsigned int                    nr_hangs;
        unsigned int                    max_hang_time;
 #endif
+
        ktime_t                         expires_next;
        struct hrtimer                  *next_timer;
        ktime_t                         softirq_expires_next;
+       struct hrtimer                  *softirq_next_timer;
+
        struct hrtimer_clock_base       clock_base[HRTIMER_MAX_CLOCK_BASES];
 } ____cacheline_aligned;
 
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -78,6 +78,7 @@
 #define MASK_SHIFT             (HRTIMER_BASE_MONOTONIC_SOFT)
 #define HRTIMER_ACTIVE_HARD    ((1U << MASK_SHIFT) - 1)
 #define HRTIMER_ACTIVE_SOFT    (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
+#define HRTIMER_ACTIVE         (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
 
 /*
  * The timer bases:
@@ -493,33 +494,49 @@ static inline void debug_deactivate(stru
        trace_hrtimer_cancel(timer);
 }
 
-static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base,
-                                            struct hrtimer *timer)
+static inline bool hrtimer_is_softirq(struct hrtimer *timer)
 {
-       cpu_base->next_timer = timer;
+       return timer->base->index >= HRTIMER_BASE_MONOTONIC_SOFT;
+}
+
+
+static struct hrtimer_block_base *
+__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
+{
+       struct hrtimer_clock_base *base = NULL;
+
+       if (*active) {
+               int idx = __ffs(*active);
+               *active &= (1U << idx);
+               base = cpu_base->clock_base[idx];
+       }
+
+       return base;
 }
 
+#define for_each_active_base(base, cpu_base, active)   \
+       while ((base = __next_base((cpu_base), &(active))))
+
 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
                                         unsigned int active,
                                         ktime_t expires_next)
 {
+       struct hrtimer_clock_base *base;
        ktime_t expires;
 
-       while (active) {
-               unsigned int id = __ffs(active);
-               struct hrtimer_clock_base *base;
+       for_each_active_base(base, cpu_base, active) {
                struct timerqueue_node *next;
                struct hrtimer *timer;
 
-               active &= ~(1U << id);
-               base = cpu_base->clock_base + id;
-
                next = timerqueue_getnext(&base->active);
                timer = container_of(next, struct hrtimer, node);
                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                if (expires < expires_next) {
                        expires_next = expires;
-                       hrtimer_update_next_timer(cpu_base, timer);
+                       if (hrtimer_is_softirq(timer))
+                               cpu_base->softirq_next_timer = timer;
+                       else
+                               cpu_base->next_timer = timer;
                }
        }
        /*
@@ -529,30 +546,47 @@ static ktime_t __hrtimer_next_event_base
         */
        if (expires_next < 0)
                expires_next = 0;
+
        return expires_next;
 }
 
-static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base)
+/*
+ * Recomputes cpu_base::*next_timer and returns the earliest expires_next but
+ * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
+ *
+ * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
+ * those timers will get run whenever the softirq gets handled, at the end of
+ * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these 
bases.
+ *
+ * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
+ * The !softirq values are the minima across HRTIMER_ACTIVE, unless an actual
+ * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
+ *
+ * @active_mask must be one of:
+ *  - HRTIMER_ACTIVE,
+ *  - HRTIMER_ACTIVE_SOFT, or
+ *  - HRTIMER_ACTIVE_HARD.
+ */
+static ktime_t
+__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int 
active_mask)
 {
        unsigned int active;
+       struct hrtimer *next_timer = NULL;
        ktime_t expires_next = KTIME_MAX;
 
-       hrtimer_update_next_timer(cpu_base, NULL);
-
-       if (!cpu_base->softirq_activated) {
+       if (!cpu_base->softirq_activated && (active_mask & 
HRTIMER_ACTIVE_SOFT)) {
                active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
-               expires_next = __hrtimer_next_event_base(cpu_base, active,
-                                                        expires_next);
-               cpu_base->softirq_expires_next = expires_next;
-       }
+               cpu_base->softirq_next_timer = next_timer;
+               expires_next = __hrtimer_next_event_base(cpu_base, active, 
expires_next);
 
-       active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
-       expires_next = __hrtimer_next_event_base(cpu_base, active, 
expires_next);
+               next_timer = cpu_base->softirq_next_timer;
+       }
 
-       /*
-        * cpu_base->expires_next is not updated here. It is set only
-        * in hrtimer_reprogramming path!
-        */
+       if (active_mask & HRTIMER_ACTIVE_HARD) {
+               active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+               cpu_base->next_timer = next_timer;
+               expires_next = __hrtimer_next_event_base(cpu_base, active, 
expires_next);
+       }
 
        return expires_next;
 }
@@ -621,7 +655,10 @@ hrtimer_force_reprogram(struct hrtimer_c
        if (!cpu_base->hres_active)
                return;
 
-       expires_next = __hrtimer_get_next_event(cpu_base);
+       /*
+        * Find the current next expiration time.
+        */
+       expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE);
 
        if (skip_equal && expires_next == cpu_base->expires_next)
                return;
@@ -727,6 +764,20 @@ static void hrtimer_reprogram(struct hrt
 
        WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
 
+       if (hrtimer_is_softirq(timer)) {
+               if (cpu_base->softirq_activated)
+                       return;
+
+               if (!ktime_before(expires, cpu_base->softirq_expires_next))
+                       return;
+
+               cpu_base->softirq_next_timer = timer;
+               cpu_base->softirq_expires_next = expires;
+
+               if (!ktime_before(expires, cpu_base->expires_next))
+                       return;
+       }
+
        /*
         * If the timer is not on the current cpu, we cannot reprogram
         * the other cpus clock event device.
@@ -755,7 +806,7 @@ static void hrtimer_reprogram(struct hrt
                return;
 
        /* Update the pointer to the next expiring timer */
-       hrtimer_update_next_timer(cpu_base, timer);
+       cpu_base->next_timer = timer;
        cpu_base->expires_next = expires;
 
        /*
@@ -979,47 +1030,24 @@ static inline ktime_t hrtimer_update_low
        return tim;
 }
 
-static void hrtimer_reprogram_softirq(struct hrtimer *timer)
+static void
+hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
 {
-       struct hrtimer_clock_base *base = timer->base;
-       struct hrtimer_cpu_base *cpu_base = base->cpu_base;
        ktime_t expires;
 
        /*
-        * The softirq timer is not rearmed, when the softirq was raised
-        * and has not yet run to completion.
+        * Find the next SOFT expiration.
         */
-       if (cpu_base->softirq_activated)
-               return;
-
-       expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-
-       if (!ktime_before(expires, cpu_base->softirq_expires_next))
-               return;
-
-       cpu_base->softirq_expires_next = expires;
-
-       if (!ktime_before(expires, cpu_base->expires_next))
-               return;
-       hrtimer_reprogram(timer);
-}
-
-static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base,
-                                        bool reprogram)
-{
-       ktime_t expires;
-
-       expires = __hrtimer_get_next_event(cpu_base);
+       expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
 
        if (!reprogram || !ktime_before(expires, cpu_base->expires_next))
                return;
+
        /*
-        * next_timer can be used here, because
-        * hrtimer_get_next_event() updated the next
-        * timer. expires_next is only set when reprogramming function
-        * is called.
+        * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
+        * cpu_base->*expires_next is only set by hrtimer_reprogram()
         */
-       hrtimer_reprogram(cpu_base->next_timer);
+       hrtimer_reprogram(cpu_base->softirq_next_timer);
 }
 
 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
@@ -1060,12 +1088,9 @@ void hrtimer_start_range_ns(struct hrtim
 
        base = lock_hrtimer_base(timer, &flags);
 
-       if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) {
-               if (timer->base->index < HRTIMER_BASE_MONOTONIC_SOFT)
-                       hrtimer_reprogram(timer);
-               else
-                       hrtimer_reprogram_softirq(timer);
-       }
+       if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
+               hrtimer_reprogram(timer);
+
        unlock_hrtimer_base(timer, &flags);
 }
 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
@@ -1163,7 +1188,7 @@ u64 hrtimer_get_next_event(void)
        raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
        if (!__hrtimer_hres_active(cpu_base))
-               expires = __hrtimer_get_next_event(cpu_base);
+               expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE);
 
        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
@@ -1263,7 +1288,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active);
 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
                          struct hrtimer_clock_base *base,
                          struct hrtimer *timer, ktime_t *now,
-                         bool hardirq)
+                         unsigned long flags)
 {
        enum hrtimer_restart (*fn)(struct hrtimer *);
        int restart;
@@ -1298,19 +1323,13 @@ static void __run_hrtimer(struct hrtimer
         * protected against migration to a different CPU even if the lock
         * is dropped.
         */
-       if (hardirq)
-               raw_spin_unlock(&cpu_base->lock);
-       else
-               raw_spin_unlock_irq(&cpu_base->lock);
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
        trace_hrtimer_expire_entry(timer, now);
        restart = fn(timer);
        trace_hrtimer_expire_exit(timer);
 
-       if (hardirq)
-               raw_spin_lock(&cpu_base->lock);
-       else
-               raw_spin_lock_irq(&cpu_base->lock);
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
        /*
         * Note: We clear the running state after enqueue_hrtimer and
@@ -1339,19 +1358,15 @@ static void __run_hrtimer(struct hrtimer
 }
 
 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t 
now,
-                                unsigned int active_mask)
+                                unsigned int active_mask, unsigned long flags)
 {
        unsigned int active = cpu_base->active_bases & active_mask;
+       struct hrtimer_clock_base *base;
 
-       while (active) {
-               unsigned int id = __ffs(active);
-               struct hrtimer_clock_base *base;
+       for_each_active_base(base, cpu_base, active) {
                struct timerqueue_node *node;
                ktime_t basenow;
 
-               active &= ~(1U << id);
-               base = cpu_base->clock_base + id;
-
                basenow = ktime_add(now, base->offset);
 
                while ((node = timerqueue_getnext(&base->active))) {
@@ -1374,8 +1389,7 @@ static void __hrtimer_run_queues(struct
                        if (basenow < hrtimer_get_softexpires_tv64(timer))
                                break;
 
-                       __run_hrtimer(cpu_base, base, timer, &basenow,
-                                     active_mask == HRTIMER_ACTIVE_HARD);
+                       __run_hrtimer(cpu_base, base, timer, &basenow, flags);
                }
        }
 }
@@ -1383,17 +1397,18 @@ static void __hrtimer_run_queues(struct
 static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 {
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+       unsigned long flags;
        ktime_t now;
 
-       raw_spin_lock_irq(&cpu_base->lock);
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
        now = hrtimer_update_base(cpu_base);
-       __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT);
+       __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_SOFT, flags);
 
        cpu_base->softirq_activated = 0;
        hrtimer_update_softirq_timer(cpu_base, true);
 
-       raw_spin_unlock_irq(&cpu_base->lock);
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1406,13 +1421,14 @@ void hrtimer_interrupt(struct clock_even
 {
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
        ktime_t expires_next, now, entry_time, delta;
+       unsigned long flags;
        int retries = 0;
 
        BUG_ON(!cpu_base->hres_active);
        cpu_base->nr_events++;
        dev->next_event = KTIME_MAX;
 
-       raw_spin_lock(&cpu_base->lock);
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
        entry_time = now = hrtimer_update_base(cpu_base);
 retry:
        cpu_base->in_hrtirq = 1;
@@ -1431,17 +1447,17 @@ void hrtimer_interrupt(struct clock_even
                raise_softirq_irqoff(HRTIMER_SOFTIRQ);
        }
 
-       __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD);
+       __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD, flags);
 
-       /* Reevaluate the hard interrupt clock bases for the next expiry */
-       expires_next = __hrtimer_get_next_event(cpu_base);
+       /* Reevaluate the clock bases for the next expiry */
+       expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE);
        /*
         * Store the new expiry value so the migration code can verify
         * against it.
         */
        cpu_base->expires_next = expires_next;
        cpu_base->in_hrtirq = 0;
-       raw_spin_unlock(&cpu_base->lock);
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 
        /* Reprogramming necessary ? */
        if (!tick_program_event(expires_next, 0)) {
@@ -1462,7 +1478,7 @@ void hrtimer_interrupt(struct clock_even
         * Acquire base lock for updating the offsets and retrieving
         * the current time.
         */
-       raw_spin_lock(&cpu_base->lock);
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
        now = hrtimer_update_base(cpu_base);
        cpu_base->nr_retries++;
        if (++retries < 3)
@@ -1475,7 +1491,8 @@ void hrtimer_interrupt(struct clock_even
         */
        cpu_base->nr_hangs++;
        cpu_base->hang_detected = 1;
-       raw_spin_unlock(&cpu_base->lock);
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+
        delta = ktime_sub(now, entry_time);
        if ((unsigned int)delta > cpu_base->max_hang_time)
                cpu_base->max_hang_time = (unsigned int) delta;
@@ -1517,6 +1534,7 @@ static inline void __hrtimer_peek_ahead_
 void hrtimer_run_queues(void)
 {
        struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+       unsigned long flags;
        ktime_t now;
 
        if (__hrtimer_hres_active(cpu_base))
@@ -1534,7 +1552,7 @@ void hrtimer_run_queues(void)
                return;
        }
 
-       raw_spin_lock(&cpu_base->lock);
+       raw_spin_lock_irqsave(&cpu_base->lock, flags);
        now = hrtimer_update_base(cpu_base);
 
        if (!ktime_before(now, cpu_base->softirq_expires_next)) {
@@ -1543,8 +1561,8 @@ void hrtimer_run_queues(void)
                raise_softirq_irqoff(HRTIMER_SOFTIRQ);
        }
 
-       __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD);
-       raw_spin_unlock(&cpu_base->lock);
+       __hrtimer_run_queues(cpu_base, now, HRTIMER_ACTIVE_HARD, flags);
+       raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }
 
 /*
@@ -1768,7 +1786,6 @@ int hrtimers_dead_cpu(unsigned int scpu)
        BUG_ON(cpu_online(scpu));
        tick_cancel_sched_timer(scpu);
 
-       local_bh_disable();
        local_irq_disable();
        old_base = &per_cpu(hrtimer_bases, scpu);
        new_base = this_cpu_ptr(&hrtimer_bases);
@@ -1796,7 +1813,6 @@ int hrtimers_dead_cpu(unsigned int scpu)
        /* Check, if we got expired work to do */
        __hrtimer_peek_ahead_timers();
        local_irq_enable();
-       local_bh_enable();
        return 0;
 }
 

Reply via email to