On Fri, 2013-03-22 at 01:11 +0000, Ben Hutchings wrote:
> Commit b22affe0aef4 'hrtimer: Prevent hrtimer_enqueue_reprogram race'
> conflicts with the RT patches
> hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch and
> peter_zijlstra-frob-hrtimer.patch, as they all change
> hrtimer_enqueue_reprogram().  It seems that the changes in the RT
> patches now belong in __hrtimer_start_range_ns().
> 
> Since I haven't seen any RT releases in a while, here's what I came up
> with for 3.2-rt:

Note, I posted a fix on Tuesday:

https://lkml.org/lkml/2013/3/19/369

I'm waiting for Thomas to give his OK on it before releasing the series.
He told me he'll have a look at it tomorrow. I've already ran the series
through all my tests, and will post it immediately after I get the OK.
Or if there's a issue I will have to fix it and rerun my tests.


> 
> ---
> From: Thomas Gleixner <t...@linutronix.de>
> Date: Fri, 3 Jul 2009 08:44:31 -0500
> Subject: hrtimer: fixup hrtimer callback changes for preempt-rt
> 
> In preempt-rt we can not call the callbacks which take sleeping locks
> from the timer interrupt context.
> 
> Bring back the softirq split for now, until we fixed the signal
> delivery problem for real.
> 
> Signed-off-by: Thomas Gleixner <t...@linutronix.de>
> Signed-off-by: Ingo Molnar <mi...@elte.hu>
> [bwh: Pull the changes to hrtimer_enqueue_reprogram() up into
>  __hrtimer_start_range_ns(), following changes in
>  commit b22affe0aef4 'hrtimer: Prevent hrtimer_enqueue_reprogram race'
>  backported into 3.2.40]
> Signed-off-by: Ben Hutchings <b...@decadent.org.uk>
> ---
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -111,6 +111,8 @@ struct hrtimer {
>       enum hrtimer_restart            (*function)(struct hrtimer *);
>       struct hrtimer_clock_base       *base;
>       unsigned long                   state;
> +     struct list_head                cb_entry;
> +     int                             irqsafe;
>  #ifdef CONFIG_TIMER_STATS
>       int                             start_pid;
>       void                            *start_site;
> @@ -147,6 +149,7 @@ struct hrtimer_clock_base {
>       int                     index;
>       clockid_t               clockid;
>       struct timerqueue_head  active;
> +     struct list_head        expired;
>       ktime_t                 resolution;
>       ktime_t                 (*get_time)(void);
>       ktime_t                 softirq_time;
> --- a/kernel/hrtimer.c
> +++ b/kernel/hrtimer.c
> @@ -589,8 +589,7 @@ static int hrtimer_reprogram(struct hrti
>        * When the callback is running, we do not reprogram the clock event
>        * device. The timer callback is either running on a different CPU or
>        * the callback is executed in the hrtimer_interrupt context. The
> -      * reprogramming is handled either by the softirq, which called the
> -      * callback or at the end of the hrtimer_interrupt.
> +      * reprogramming is handled at the end of the hrtimer_interrupt.
>        */
>       if (hrtimer_callback_running(timer))
>               return 0;
> @@ -625,6 +624,9 @@ static int hrtimer_reprogram(struct hrti
>       return res;
>  }
>  
> +static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
> +static int hrtimer_rt_defer(struct hrtimer *timer);
> +
>  /*
>   * Initialize the high resolution related parts of cpu_base
>   */
> @@ -730,6 +732,11 @@ static inline int hrtimer_enqueue_reprog
>  }
>  static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
>  static inline void retrigger_next_event(void *arg) { }
> +static inline int hrtimer_reprogram(struct hrtimer *timer,
> +                                 struct hrtimer_clock_base *base)
> +{
> +     return 0;
> +}
>  
>  #endif /* CONFIG_HIGH_RES_TIMERS */
>  
> @@ -861,9 +868,9 @@ void hrtimer_wait_for_timer(const struct
>  {
>       struct hrtimer_clock_base *base = timer->base;
>  
> -     if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
> +     if (base && base->cpu_base && !timer->irqsafe)
>               wait_event(base->cpu_base->wait,
> -                             !(timer->state & HRTIMER_STATE_CALLBACK));
> +                        !(timer->state & HRTIMER_STATE_CALLBACK));
>  }
>  
>  #else
> @@ -913,6 +920,11 @@ static void __remove_hrtimer(struct hrti
>       if (!(timer->state & HRTIMER_STATE_ENQUEUED))
>               goto out;
>  
> +     if (unlikely(!list_empty(&timer->cb_entry))) {
> +             list_del_init(&timer->cb_entry);
> +             goto out;
> +     }
> +
>       next_timer = timerqueue_getnext(&base->active);
>       timerqueue_del(&base->active, &timer->node);
>       if (&timer->node == next_timer) {
> @@ -1011,6 +1023,26 @@ int __hrtimer_start_range_ns(struct hrti
>        */
>       if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
>               && hrtimer_enqueue_reprogram(timer, new_base)) {
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +     again:

What kernel are you working with? I don't see anywhere the "again:"
within a PREEMPT_RT_BASE block.

> +             /*
> +              * Move softirq based timers away from the rbtree in
> +              * case it expired already. Otherwise we would have a
> +              * stale base->first entry until the softirq runs.
> +              */
> +             if (!hrtimer_rt_defer(timer)) {
> +                     ktime_t now = ktime_get();
> +
> +                     __run_hrtimer(timer, &now);
> +                     /*
> +                      * __run_hrtimer might have requeued timer and
> +                      * it could be base->first again.
> +                      */
> +                     if (&timer->node == new_base->active.next &&
> +                         hrtimer_enqueue_reprogram(timer, new_base))
> +                             goto again;
> +             } else
> +#endif
>               if (wakeup) {
>                       /*
>                        * We need to drop cpu_base->lock to avoid a
> @@ -1188,6 +1220,7 @@ static void __hrtimer_init(struct hrtime
>  
>       base = hrtimer_clockid_to_base(clock_id);
>       timer->base = &cpu_base->clock_base[base];
> +     INIT_LIST_HEAD(&timer->cb_entry);
>       timerqueue_init(&timer->node);
>  
>  #ifdef CONFIG_TIMER_STATS
> @@ -1271,10 +1304,118 @@ static void __run_hrtimer(struct hrtimer
>       timer->state &= ~HRTIMER_STATE_CALLBACK;
>  }
>  
> -#ifdef CONFIG_HIGH_RES_TIMERS
> -
>  static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
>  
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
> +                              struct hrtimer_clock_base *base)
> +{
> +     /*
> +      * Note, we clear the callback flag before we requeue the
> +      * timer otherwise we trigger the callback_running() check
> +      * in hrtimer_reprogram().
> +      */
> +     timer->state &= ~HRTIMER_STATE_CALLBACK;
> +
> +     if (restart != HRTIMER_NORESTART) {
> +             BUG_ON(hrtimer_active(timer));
> +             /*
> +              * Enqueue the timer, if it's the leftmost timer then
> +              * we need to reprogram it.
> +              */
> +             if (!enqueue_hrtimer(timer, base))
> +                     return;
> +
> +             if (hrtimer_reprogram(timer, base))
> +                     goto requeue;
> +
> +     } else if (hrtimer_active(timer)) {
> +             /*
> +              * If the timer was rearmed on another CPU, reprogram
> +              * the event device.
> +              */
> +             if (&timer->node == base->active.next &&
> +                 hrtimer_reprogram(timer, base))
> +                     goto requeue;
> +     }
> +     return;
> +
> +requeue:
> +     /*
> +      * Timer is expired. Thus move it from tree to pending list
> +      * again.
> +      */
> +     __remove_hrtimer(timer, base, timer->state, 0);
> +     list_add_tail(&timer->cb_entry, &base->expired);
> +}
> +
> +/*
> + * The changes in mainline which removed the callback modes from
> + * hrtimer are not yet working with -rt. The non wakeup_process()
> + * based callbacks which involve sleeping locks need to be treated
> + * seperately.
> + */
> +static void hrtimer_rt_run_pending(void)
> +{
> +     enum hrtimer_restart (*fn)(struct hrtimer *);
> +     struct hrtimer_cpu_base *cpu_base;
> +     struct hrtimer_clock_base *base;
> +     struct hrtimer *timer;
> +     int index, restart;
> +
> +     local_irq_disable();
> +     cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
> +
> +     raw_spin_lock(&cpu_base->lock);
> +
> +     for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
> +             base = &cpu_base->clock_base[index];
> +
> +             while (!list_empty(&base->expired)) {
> +                     timer = list_first_entry(&base->expired,
> +                                              struct hrtimer, cb_entry);
> +
> +                     /*
> +                      * Same as the above __run_hrtimer function
> +                      * just we run with interrupts enabled.
> +                      */
> +                     debug_hrtimer_deactivate(timer);
> +                     __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 
> 0);
> +                     timer_stats_account_hrtimer(timer);
> +                     fn = timer->function;
> +
> +                     raw_spin_unlock_irq(&cpu_base->lock);
> +                     restart = fn(timer);
> +                     raw_spin_lock_irq(&cpu_base->lock);
> +
> +                     hrtimer_rt_reprogram(restart, timer, base);
> +             }
> +     }
> +
> +     raw_spin_unlock_irq(&cpu_base->lock);
> +
> +     wake_up_timer_waiters(cpu_base);
> +}
> +
> +static int hrtimer_rt_defer(struct hrtimer *timer)
> +{
> +     if (timer->irqsafe)
> +             return 0;
> +
> +     __remove_hrtimer(timer, timer->base, timer->state, 0);
> +     list_add_tail(&timer->cb_entry, &timer->base->expired);
> +     return 1;
> +}
> +
> +#else
> +
> +static inline void hrtimer_rt_run_pending(void) { }
> +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
> +
> +#endif
> +
> +#ifdef CONFIG_HIGH_RES_TIMERS
> +
>  /*
>   * High resolution timer interrupt
>   * Called with interrupts disabled
> @@ -1283,7 +1424,7 @@ void hrtimer_interrupt(struct clock_even
>  {
>       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
>       ktime_t expires_next, now, entry_time, delta;
> -     int i, retries = 0;
> +     int i, retries = 0, raise = 0;
>  
>       BUG_ON(!cpu_base->hres_active);
>       cpu_base->nr_events++;
> @@ -1349,7 +1490,10 @@ retry:
>                               break;
>                       }
>  
> -                     __run_hrtimer(timer, &basenow);
> +                     if (!hrtimer_rt_defer(timer))
> +                             __run_hrtimer(timer, &basenow);
> +                     else
> +                             raise = 1;
>               }
>       }
>  
> @@ -1364,6 +1508,10 @@ retry:
>       if (expires_next.tv64 == KTIME_MAX ||
>           !tick_program_event(expires_next, 0)) {
>               cpu_base->hang_detected = 0;
> +
> +             if (raise)
> +                     raise_softirq_irqoff(HRTIMER_SOFTIRQ);
> +
>               return;
>       }
>  
> @@ -1444,6 +1592,12 @@ void hrtimer_peek_ahead_timers(void)
>       local_irq_restore(flags);
>  }
>  
> +#else /* CONFIG_HIGH_RES_TIMERS */
> +
> +static inline void __hrtimer_peek_ahead_timers(void) { }
> +
> +#endif       /* !CONFIG_HIGH_RES_TIMERS */
> +
>  static void run_hrtimer_softirq(struct softirq_action *h)
>  {
>       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
> @@ -1453,15 +1607,9 @@ static void run_hrtimer_softirq(struct s
>               clock_was_set();
>       }
>  
> -     hrtimer_peek_ahead_timers();
> +     hrtimer_rt_run_pending();
>  }
>  
> -#else /* CONFIG_HIGH_RES_TIMERS */
> -
> -static inline void __hrtimer_peek_ahead_timers(void) { }
> -
> -#endif       /* !CONFIG_HIGH_RES_TIMERS */
> -
>  /*
>   * Called from timer softirq every jiffy, expire hrtimers:
>   *
> @@ -1494,7 +1642,7 @@ void hrtimer_run_queues(void)
>       struct timerqueue_node *node;
>       struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
>       struct hrtimer_clock_base *base;
> -     int index, gettime = 1;
> +     int index, gettime = 1, raise = 0;
>  
>       if (hrtimer_hres_active())
>               return;
> @@ -1519,12 +1667,16 @@ void hrtimer_run_queues(void)
>                                       hrtimer_get_expires_tv64(timer))
>                               break;
>  
> -                     __run_hrtimer(timer, &base->softirq_time);
> +                     if (!hrtimer_rt_defer(timer))
> +                             __run_hrtimer(timer, &base->softirq_time);
> +                     else
> +                             raise = 1;
>               }
>               raw_spin_unlock(&cpu_base->lock);
>       }
>  
> -     wake_up_timer_waiters(cpu_base);
> +     if (raise)
> +             raise_softirq_irqoff(HRTIMER_SOFTIRQ);
>  }
>  
>  /*
> @@ -1546,6 +1698,7 @@ static enum hrtimer_restart hrtimer_wake
>  void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct 
> *task)
>  {
>       sl->timer.function = hrtimer_wakeup;
> +     sl->timer.irqsafe = 1;
>       sl->task = task;
>  }
>  EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
> @@ -1684,6 +1837,7 @@ static void __cpuinit init_hrtimers_cpu(
>       for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
>               cpu_base->clock_base[i].cpu_base = cpu_base;
>               timerqueue_init_head(&cpu_base->clock_base[i].active);
> +             INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
>       }
>  
>       hrtimer_init_hres(cpu_base);
> @@ -1802,9 +1956,7 @@ void __init hrtimers_init(void)
>       hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
>                         (void *)(long)smp_processor_id());
>       register_cpu_notifier(&hrtimers_nb);
> -#ifdef CONFIG_HIGH_RES_TIMERS
>       open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
> -#endif
>  }
>  
>  /**
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -189,6 +189,7 @@ void init_rt_bandwidth(struct rt_bandwid
>  
>       hrtimer_init(&rt_b->rt_period_timer,
>                       CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +     rt_b->rt_period_timer.irqsafe = 1;
>       rt_b->rt_period_timer.function = sched_rt_period_timer;
>  }
>  
> @@ -1274,6 +1275,7 @@ static void init_rq_hrtick(struct rq *rq
>  
>       hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>       rq->hrtick_timer.function = hrtick;
> +     rq->hrtick_timer.irqsafe = 1;
>  }
>  #else        /* CONFIG_SCHED_HRTICK */
>  static inline void hrtick_clear(struct rq *rq)
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -805,6 +805,7 @@ void tick_setup_sched_timer(void)
>        * Emulate tick processing via per-CPU hrtimers:
>        */
>       hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
> +     ts->sched_timer.irqsafe = 1;
>       ts->sched_timer.function = tick_sched_timer;
>  
>       /* Get the next period (per cpu) */
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -436,6 +436,7 @@ static void watchdog_prepare_cpu(int cpu
>       WARN_ON(per_cpu(softlockup_watchdog, cpu));
>       hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>       hrtimer->function = watchdog_timer_fn;
> +     hrtimer->irqsafe = 1;
>  }
>  
>  static int watchdog_enable(int cpu)
> ---
> From: Peter Zijlstra <a.p.zijls...@chello.nl>
> Date: Fri, 12 Aug 2011 17:39:54 +0200
> Subject: hrtimer: Don't call the timer handler from hrtimer_start
> 
>  [<ffffffff812de4a9>] __delay+0xf/0x11
>  [<ffffffff812e36e9>] do_raw_spin_lock+0xd2/0x13c
>  [<ffffffff815028ee>] _raw_spin_lock+0x60/0x73              
> rt_b->rt_runtime_lock
>  [<ffffffff81068f68>] ? sched_rt_period_timer+0xad/0x281
>  [<ffffffff81068f68>] sched_rt_period_timer+0xad/0x281
>  [<ffffffff8109e5e1>] __run_hrtimer+0x1e4/0x347
>  [<ffffffff81068ebb>] ? enqueue_rt_entity+0x36/0x36
>  [<ffffffff8109f2b1>] __hrtimer_start_range_ns+0x2b5/0x40a  
> base->cpu_base->lock  (lock_hrtimer_base)
>  [<ffffffff81068b6f>] __enqueue_rt_entity+0x26f/0x2aa       
> rt_b->rt_runtime_lock (start_rt_bandwidth)
>  [<ffffffff81068ead>] enqueue_rt_entity+0x28/0x36
>  [<ffffffff81069355>] enqueue_task_rt+0x3d/0xb0
>  [<ffffffff810679d6>] enqueue_task+0x5d/0x64
>  [<ffffffff810714fc>] task_setprio+0x210/0x29c              rq->lock
>  [<ffffffff810b56cb>] __rt_mutex_adjust_prio+0x25/0x2a      p->pi_lock
>  [<ffffffff810b5d2c>] task_blocks_on_rt_mutex+0x196/0x20f
> 
> Instead make __hrtimer_start_range_ns() return -ETIME when the timer
> is in the past. Since body actually uses the hrtimer_start*() return
> value its pretty safe to wreck it.
> 
> Also, it will only ever return -ETIME for timer->irqsafe || !wakeup
> timers.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijls...@chello.nl>
> [bwh: Pull the changes to hrtimer_enqueue_reprogram() up into
>  __hrtimer_start_range_ns(), following changes in
>  commit b22affe0aef4 'hrtimer: Prevent hrtimer_enqueue_reprogram race'
>  backported into 3.2.40]
> Signed-off-by: Ben Hutchings <b...@decadent.org.uk>
> ---
> --- a/kernel/hrtimer.c
> +++ b/kernel/hrtimer.c
> @@ -1023,37 +1023,31 @@ int __hrtimer_start_range_ns(struct hrti
>        */
>       if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)
>               && hrtimer_enqueue_reprogram(timer, new_base)) {
> -#ifdef CONFIG_PREEMPT_RT_BASE
> -     again:

Never mind, I see you applied two patches.

>               /*
>                * Move softirq based timers away from the rbtree in
>                * case it expired already. Otherwise we would have a
>                * stale base->first entry until the softirq runs.
>                */
> -             if (!hrtimer_rt_defer(timer)) {
> -                     ktime_t now = ktime_get();
> -
> -                     __run_hrtimer(timer, &now);
> -                     /*
> -                      * __run_hrtimer might have requeued timer and
> -                      * it could be base->first again.
> -                      */
> -                     if (&timer->node == new_base->active.next &&
> -                         hrtimer_enqueue_reprogram(timer, new_base))
> -                             goto again;
> -             } else
> +             if (wakeup
> +#ifdef CONFIG_PREEMPT_RT_BASE
> +                 && hrtimer_rt_defer(timer)
>  #endif

This is very similar to what I came up with.

> -             if (wakeup) {
> -                     /*
> -                      * We need to drop cpu_base->lock to avoid a
> -                      * lock ordering issue vs. rq->lock.
> -                      */
> +                 ) {
>                       raw_spin_unlock(&new_base->cpu_base->lock);
>                       raise_softirq_irqoff(HRTIMER_SOFTIRQ);
>                       local_irq_restore(flags);
> -                     return ret;
> +                     return 0;
>               } else {
> -                     __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
> +                     ret = -ETIME;
> +
> +                     /*
> +                      * In case we failed to reprogram the timer (mostly
> +                      * because out current timer is already elapsed),
> +                      * remove it again and report a failure. This avoids
> +                      * stale base->first entries.
> +                      */
> +                     __remove_hrtimer(timer, new_base,
> +                                     timer->state & HRTIMER_STATE_CALLBACK, 
> 0);

Yep, looks like we are on the right track  :-)

>               }
>       }
>  
> ---
> 
> Note, the #ifdef in the second patch can't be removed, as the !RT
> definition of hrtimer_rt_defer() returns 0 whereas we want it to be true
> here.  (At least, that seems to match the logic of the previous version
> of the patch.)

Yep. That's what I'm waiting to hear too.

-- Steve




-- 
To UNSUBSCRIBE, email to debian-kernel-requ...@lists.debian.org
with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org
Archive: http://lists.debian.org/1363919473.6345.88.ca...@gandalf.local.home

Reply via email to