On -RT it is possible that an invocation of timer_set() or
timer_delete() preempts an itimer which results in TIMER_RETRY. We can't
retry immediately because if the process preempted the softirq then it
has a higher priority and will loop and retry forever.

As a workaround for -RT the "retry" thread waited for the hrtimer to
complete (hrtimer_wait_for_timer()) or left the CPU via
schedule_timeout() in case of CPU-timer.
During hrtimer_wait_for_timer() the timer could be removed (and memory
freed) which would result in use-after-free for the waiter (in
hrtimer_wait_for_timer()). Therefore the RCU read section has been
extended to cover the whole period while the thread is scheduled out.
This change is part of 8df8ef7b2b9b ("hrtimers: Prepare full
preemption") in the RT patch.

This behaviour ("sleeping" while holding the RCU lock) is reported since
commit 5b72f9643b52 ("rcu: Complain if blocking in preemptible RCU
read-side critical section") and both Daniel Wagner and Grygorii
Strashko noticed [0] it.

I revert the RCU read section to what we had before. I am adding a
`kref' object to avoid removal of the itimer (incl. the hrtimer) while
there is someone waiting on it. This looks simple in timer_settime() and
timer_delete(). We had one lookup, hold the lock, increment ref-count.
If timer is deleted the waiter does the final put and the following RCU
lookup will fail.

I don't understand why the RCU lock is also held in itimer_delete(). At
this point, the process is dead (even exit_signals() was invoked) and
there is nothing that can remove that timer. timer_delete() and
timer_settime() always lookups the timer via its id during the retry.
This is not the case for itimer_delete() which gets a struct k_itimer
handed over. The comment says that "on RT it might race against
deletion" but I have no idea where it might come from.
While I understand why timer_delete() sets
        timer->it_signal = NULL
I don't know why itimer_delete() does the same. At this point there
should be no lookup or removal happen via RCU. Also ->list is protected
via sighand->siglock which is not held at this point. I *think* this was
just copied from the above (timer_delete()) while the timer were
accidently per-thread and fixed in commit 0e568881178f ("fix
posix-timers to have proper per-process scope").

[0] https://lkml.kernel.org/r/ec8b4367-ef5b-5446-2cd0-9f8f7fd69...@monom.org
[1] 
https://git.kernel.org/history/history/c/0e568881178ff0e0aceeafdb51f9fecab39e1923

Signed-off-by: Sebastian Andrzej Siewior <bige...@linutronix.de>
---
 include/linux/posix-timers.h |  1 +
 kernel/time/posix-timers.c   | 41 +++++++++++++++++++++++++----------------
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 672c4f32311e..b4cac3d8da1b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -78,6 +78,7 @@ struct k_itimer {
        struct list_head        list;
        struct hlist_node       t_hash;
        spinlock_t              it_lock;
+       struct kref             kref;
        const struct k_clock    *kclock;
        clockid_t               it_clock;
        timer_t                 it_id;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0b568087bd64..77fdd050016d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -465,6 +465,7 @@ static struct k_itimer * alloc_posix_timer(void)
                return NULL;
        }
        memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
+       kref_init(&tmr->kref);
        return tmr;
 }
 
@@ -475,6 +476,23 @@ static void k_itimer_rcu_free(struct rcu_head *head)
        kmem_cache_free(posix_timers_cache, tmr);
 }
 
+static void kitimer_get(struct k_itimer *timer)
+{
+       kref_get(&timer->kref);
+}
+
+static void kitimer_release(struct kref *kref)
+{
+       struct k_itimer *tmr = container_of(kref, struct k_itimer, kref);
+
+       call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+}
+
+static void kitimer_put(struct k_itimer *timer)
+{
+       kref_put(&timer->kref, kitimer_release);
+}
+
 #define IT_ID_SET      1
 #define IT_ID_NOT_SET  0
 static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
@@ -487,7 +505,7 @@ static void release_posix_timer(struct k_itimer *tmr, int 
it_id_set)
        }
        put_pid(tmr->it_pid);
        sigqueue_free(tmr->sigq);
-       call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+       kitimer_put(tmr);
 }
 
 static int common_timer_create(struct k_itimer *new_timer)
@@ -904,7 +922,7 @@ static int do_timer_settime(timer_t timer_id, int flags,
        if (!timr)
                return -EINVAL;
 
-       rcu_read_lock();
+       kitimer_get(timr);
        kc = timr->kclock;
        if (WARN_ON_ONCE(!kc || !kc->timer_set))
                error = -EINVAL;
@@ -914,12 +932,11 @@ static int do_timer_settime(timer_t timer_id, int flags,
        unlock_timer(timr, flag);
        if (error == TIMER_RETRY) {
                timer_wait_for_callback(kc, timr);
+               kitimer_put(timr);
                old_spec64 = NULL;      // We already got the old time...
-               rcu_read_unlock();
                goto retry;
        }
-       rcu_read_unlock();
-
+       kitimer_put(timr);
        return error;
 }
 
@@ -1000,15 +1017,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
        if (!timer)
                return -EINVAL;
 
-       rcu_read_lock();
        if (timer_delete_hook(timer) == TIMER_RETRY) {
+               kitimer_get(timer);
                unlock_timer(timer, flags);
+
                timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
                                        timer);
-               rcu_read_unlock();
+               kitimer_put(timer);
                goto retry_delete;
        }
-       rcu_read_unlock();
 
        spin_lock(&current->sighand->siglock);
        list_del(&timer->list);
@@ -1034,18 +1051,10 @@ static void itimer_delete(struct k_itimer *timer)
 retry_delete:
        spin_lock_irqsave(&timer->it_lock, flags);
 
-       /* On RT we can race with a deletion */
-       if (!timer->it_signal) {
-               unlock_timer(timer, flags);
-               return;
-       }
-
        if (timer_delete_hook(timer) == TIMER_RETRY) {
-               rcu_read_lock();
                unlock_timer(timer, flags);
                timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
                                        timer);
-               rcu_read_unlock();
                goto retry_delete;
        }
        list_del(&timer->list);
-- 
2.16.2

Reply via email to