So I had a look at this yesterday and came up with the below --
completely untested etc.

Now in order to compile test I meant to convert the completion code and
ran head first into complete_all; it uses spin_lock_irqsave() which
means it can be used from IRQ context. Now if you look at
__swake_up_all() you'll find a comment on how we cannot have this.

Now I can't remember how important that all was for RT but I figured I'd
post it and let other people stare at it for a bit.

---
 include/linux/swait.h |  181 ++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/swait.c  |  162 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 343 insertions(+)

--- /dev/null
+++ b/include/linux/swait.h
@@ -0,0 +1,181 @@
+#ifndef _LINUX_SWAIT_H
+#define _LINUX_SWAIT_H
+
+#include <linux/list.h>
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <asm/current.h>
+
+/*
+ * Simple wait queues
+ *
+ * While these are very similar to the other/complex wait queues (wait.h) the
+ * most important difference is that the simple waitqueue allows for
+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
+ *
+ * In order to make this so, we had to drop a fair number of features of the
+ * other waitqueue code; notably:
+ *
+ *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue
+ *
+ *  - the exclusive mode; because this requires preserving the list order
+ *    and this is hard, see __swait_wake().
+ *
+ *  - custom wake functions; because you cannot give any guarantees about
+ *    random code.
+ *
+ * As a side effect of this; the data structures are slimmer.
+ *
+ * One would recommend using this wait queue where possible.
+ */
+
+struct task_struct;
+
+struct swait_queue_head {
+       raw_spinlock_t          lock;
+#ifdef CONFIG_SWAIT_DEBUG
+       unsigned int            state;
+#endif
+       struct list_head        task_list;
+};
+
+struct swait_queue {
+       struct task_struct      *task;
+       struct list_head        task_list;
+};
+
+#define __SWAITQUEUE_INITIALIZER(name) {                               \
+       .task           = current,                                      \
+       .task_list      = LIST_HEAD_INIT((name).task_list),             \
+}
+
+#define DECLARE_SWAITQUEUE(name)                                       \
+       struct swait_queue name = __SWAITQUEUE_INITIALIZER(name)
+
+#define __SWAIT_QUEUE_HEAD_DEBUG_INIT()                                        
\
+       .state = 0,
+
+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) {                         \
+       .lock           = __RAW_SPIN_LOCK_UNLOCKED(name.lock),          \
+       .task_list      = LIST_HEAD_INIT((name).task_list),             \
+       __SWAIT_QUEUE_HEAD_DEBUG_INIT()                                 \
+}
+
+#define DECLARE_SWAIT_QUEUE_HEAD(name)                                 \
+       struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name)
+
+extern void __init_swait_queue_head(struct swait_queue_head *q, const char 
*name,
+                                   struct lock_class_key *key);
+
+#define init_swait_queue_head(q)                               \
+       do {                                                    \
+               static struct lock_class_key __key;             \
+               __init_swait_queue_head((q), #q, &__key);       \
+       } while (0)
+
+#ifdef CONFIG_LOCKDEP
+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)                 \
+       ({ init_swait_queue_head(&name); name; })
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)                        \
+       struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+#else
+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name)                        \
+       DECLARE_SWAIT_QUEUE_HEAD(name)
+#endif
+
+static inline int swait_active(struct swait_queue_head *q)
+{
+       return !list_empty(&q->task_list);
+}
+
+extern void __swake_up(struct swait_queue_head *q, unsigned int mode);
+extern void __swake_up_all(struct swait_queue_head *q, unsigned int mode);
+extern void __swake_up_locked(struct swait_queue_head *q, unsigned int mode);
+
+#define swake_up(x)                    __swake_up(x, TASK_NORMAL)
+#define swake_up_all(x)                        __swake_up_all(x, TASK_NORMAL)
+#define swake_up_locked(x)             __swake_up_locked((x), TASK_NORMAL)
+
+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue 
*wait);
+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue 
*wait, int state);
+extern long prepare_to_swait_event(struct swait_queue_head *q, struct 
swait_queue *wait, int state);
+
+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue 
*wait);
+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
+
+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+#define ___swait_event(wq, condition, state, ret, cmd)                 \
+({                                                                     \
+       struct swait_queue __wait;                                      \
+       long __ret = ret;                                               \
+                                                                       \
+       INIT_LIST_HEAD(&__wait.task_list);                              \
+       for (;;) {                                                      \
+               long __int = prepare_to_swait_event(&wq, &__wait, state);\
+                                                                       \
+               if (condition)                                          \
+                       break;                                          \
+                                                                       \
+               if (___wait_is_interruptible(state) && __int) {         \
+                       __ret = __int;                                  \
+                       break;                                          \
+               }                                                       \
+                                                                       \
+               cmd;                                                    \
+       }                                                               \
+       finish_swait(&wq, &__wait);                                     \
+       __ret;                                                          \
+})
+
+#define __swait_event(wq, condition)                                   \
+       (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,    \
+                           schedule())
+
+#define swait_event(wq, condition)                                     \
+do {                                                                   \
+       if (condition)                                                  \
+               break;                                                  \
+       __swait_event(wq, condition);                                   \
+} while (0)
+
+#define __swait_event_timeout(wq, condition, timeout)                  \
+       ___swait_event(wq, ___wait_cond_timeout(condition),             \
+                     TASK_UNINTERRUPTIBLE, timeout,                    \
+                     __ret = schedule_timeout(__ret))
+
+#define swait_event_timeout(wq, condition, timeout)                    \
+({                                                                     \
+       long __ret = timeout;                                           \
+       if (!___wait_cond_timeout(condition))                           \
+               __ret = __swait_event_timeout(wq, condition, timeout);  \
+       __ret;                                                          \
+})
+
+#define __swait_event_interruptible(wq, condition)                     \
+       ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,            \
+                     schedule())
+
+#define swait_event_interruptible(wq, condition)                       \
+({                                                                     \
+       int __ret = 0;                                                  \
+       if (!(condition))                                               \
+               __ret = __swait_event_interruptible(wq, condition);     \
+       __ret;                                                          \
+})
+
+#define __swait_event_interruptible_timeout(wq, condition, timeout)    \
+       ___swait_event(wq, ___wait_cond_timeout(condition),             \
+                     TASK_INTERRUPTIBLE, timeout,                      \
+                     __ret = schedule_timeout(__ret))
+
+#define swait_event_interruptible_timeout(wq, condition, timeout)      \
+({                                                                     \
+       long __ret = timeout;                                           \
+       if (!___wait_cond_timeout(condition))                           \
+               __ret = __swait_event_interruptible_timeout(wq,         \
+                                               condition, timeout);    \
+       __ret;                                                          \
+})
+
+#endif /* _LINUX_SWAIT_H */
--- /dev/null
+++ b/kernel/sched/swait.c
@@ -0,0 +1,162 @@
+
+#include <linux/swait.h>
+
+void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
+                            struct lock_class_key *key)
+{
+       raw_spin_lock_init(&q->lock);
+       lockdep_set_class_and_name(&q->lock, key, name);
+       INIT_LIST_HEAD(&q->task_list);
+#ifdef CONFIG_SWAIT_DEBUG
+       q->state = 0;
+#endif
+}
+EXPORT_SYMBOL(__init_swait_queue_head);
+
+#ifdef CONFIG_SWAIT_DEBUG
+/*
+ * Ensure we do not mix and match INTERRUPTIBLE and UNINTERRUPTIBLE sleeps.
+ * This guarantees wakeups are always valid and we need not go look for
+ * wakeup targets, this ensures __swake_up() is O(1).
+ */
+static inline void __swait_wakeup_debug(struct swait_queue_head *q, unsigned 
int state)
+{
+       if (q->state == 0)
+               return;
+
+       WARN_ON_ONCE(!(q->state & state));
+}
+static inline void __swait_wait_debug(struct swait_queue_head *q, unsigned int 
state)
+{
+       if (q->state == 0)
+               q->state = state;
+
+       WARN_ON_ONCE(q->state != state);
+}
+#else
+static inline void __swait_wakeup_debug(struct swait_queue_head *q, unsigned 
int state)
+{
+}
+static inline void __swait_wait_debug(struct swait_queue_head *q, unsigned int 
state)
+{
+}
+#endif
+
+/*
+ * The thing about the wake_up_state() return value; I think we can ignore it.
+ *
+ * If for some reason it would return 0, that means the previously waiting
+ * task is already running, so it will observe condition true (or has already).
+ */
+void __swake_up_locked(struct swait_queue_head *q, unsigned int state)
+{
+       struct swait_queue *curr;
+
+       __swait_wakeup_debug(q, state);
+
+       list_for_each_entry(curr, &q->task_list, task_list) {
+               wake_up_state(curr->task, state);
+               list_del_init(&curr->task_list);
+               break;
+       }
+}
+EXPORT_SYMBOL(__swake_up_locked);
+
+void __swake_up(struct swait_queue_head *q, unsigned int state)
+{
+       unsigned long flags;
+
+       __swait_wakeup_debug(q, state);
+
+       if (!swait_active(q))
+               return;
+
+       raw_spin_lock_irqsave(&q->lock, flags);
+       __swake_up_locked(q, state);
+       raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(__swake_up);
+
+/*
+ * Does not allow usage from IRQ disabled, since we must be able to
+ * release IRQs to guarantee bounded hold time.
+ */
+void __swake_up_all(struct swait_queue_head *q, unsigned int state)
+{
+       struct swait_queue *curr, *next;
+       LIST_HEAD(tmp);
+
+       __swait_wakeup_debug(q, state);
+
+       if (!swait_active(q))
+               return;
+
+       raw_spin_lock_irq(&q->lock);
+       list_splice_init(&q->task_list, &tmp);
+       while (!list_empty(&tmp)) {
+               curr = list_first_entry(&tmp, typeof(curr), task_list);
+
+               wake_up_state(curr->task, state);
+               list_del_init(&curr->task_list);
+
+               if (list_empty(&tmp))
+                       break;
+
+               raw_spin_unlock_irq(&q->lock);
+               raw_spin_lock_irq(&q->lock);
+       }
+       raw_spin_unlock_irq(&q->lock);
+}
+EXPORT_SYMBOL(__swake_up_all);
+
+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+       wait->task = current;
+       if (list_empty(&wait->node))
+               list_add(&wait->task_list, &q->task_list);
+}
+
+void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, 
int state)
+{
+       unsigned long flags;
+
+       __swait_wait_debug(q, state);
+
+       raw_spin_lock_irqsave(&q->lock, flags);
+       __prepare_to_swait(q, wait);
+       set_current_state(state);
+       raw_spin_unlock_irqrestore(&q->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_swait);
+
+long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue 
*wait, int state)
+{
+       if (signal_pending_state(state, current))
+               return -ERESTARTSYS;
+
+       prepare_to_swait(q, wait, state);
+
+       return 0;
+}
+EXPORT_SYMBOL(prepare_to_swait_event);
+
+void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+       __set_current_state(TASK_RUNNING);
+       if (!list_empty(&wait->task_list))
+               list_del_init(&wait->task_list);
+}
+
+void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
+{
+       unsigned long flags;
+
+       __set_current_state(TASK_RUNNING);
+
+       if (!list_empty_careful(&wait->task_list)) {
+               raw_spin_lock_irqsave(&q->lock, flags);
+               list_del_init(&wait->task_list);
+               raw_spin_unlock_irqrestore(&q->lock, flags);
+       }
+}
+EXPORT_SYMBOL(finish_swait);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to