Commit-ID:  9cff8adeaa34b5d2802f03f89803da57856b3b72
Gitweb:     http://git.kernel.org/tip/9cff8adeaa34b5d2802f03f89803da57856b3b72
Author:     NeilBrown <ne...@suse.de>
AuthorDate: Fri, 13 Feb 2015 15:49:17 +1100
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 18 Feb 2015 14:27:44 +0100

sched: Prevent recursion in io_schedule()

io_schedule() calls blk_flush_plug() which, depending on the
contents of current->plug, can initiate arbitrary blk-io requests.

Note that this contrasts with blk_schedule_flush_plug() which requires
all non-trivial work to be handed off to a separate thread.

This makes it possible for io_schedule() to recurse, and initiating
block requests could possibly call mempool_alloc() which, in times of
memory pressure, uses io_schedule().

Apart from any stack usage issues, io_schedule() will not behave
correctly when called recursively as delayacct_blkio_start() does
not allow for repeated calls.

So:
 - use ->in_iowait to detect recursion.  Set it earlier, and restore
   it to the old value.
 - move the call to "raw_rq" after the call to blk_flush_plug().
   As this is some sort of per-cpu thing, we want some chance that
   we are on the right CPU
 - When io_schedule() is called recurively, use blk_schedule_flush_plug()
   which cannot further recurse.
 - as this makes io_schedule() a lot more complex and as io_schedule()
   must match io_schedule_timeout(), but all the changes in 
io_schedule_timeout()
   and make io_schedule a simple wrapper for that.

Signed-off-by: NeilBrown <ne...@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
[ Moved the now rudimentary io_schedule() into sched.h. ]
Cc: Jens Axboe <ax...@kernel.dk>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Tony Battersby <to...@cybernetics.com>
Link: http://lkml.kernel.org/r/20150213162600.059fffb2@notabene.brown
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 include/linux/sched.h | 10 +++++++---
 kernel/sched/core.c   | 31 ++++++++++++-------------------
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8db31ef..cb5cdc7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
  */
 extern void show_stack(struct task_struct *task, unsigned long *sp);
 
-void io_schedule(void);
-long io_schedule_timeout(long timeout);
-
 extern void cpu_init (void);
 extern void trap_init(void);
 extern void update_process_times(int user);
@@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed 
long timeout);
 asmlinkage void schedule(void);
 extern void schedule_preempt_disabled(void);
 
+extern long io_schedule_timeout(long timeout);
+
+static inline void io_schedule(void)
+{
+       io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+}
+
 struct nsproxy;
 struct user_namespace;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c314000..daaea92 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4358,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
  * This task is about to go to sleep on IO. Increment rq->nr_iowait so
  * that process accounting knows that this is a task in IO wait state.
  */
-void __sched io_schedule(void)
-{
-       struct rq *rq = raw_rq();
-
-       delayacct_blkio_start();
-       atomic_inc(&rq->nr_iowait);
-       blk_flush_plug(current);
-       current->in_iowait = 1;
-       schedule();
-       current->in_iowait = 0;
-       atomic_dec(&rq->nr_iowait);
-       delayacct_blkio_end();
-}
-EXPORT_SYMBOL(io_schedule);
-
 long __sched io_schedule_timeout(long timeout)
 {
-       struct rq *rq = raw_rq();
+       int old_iowait = current->in_iowait;
+       struct rq *rq;
        long ret;
 
+       current->in_iowait = 1;
+       if (old_iowait)
+               blk_schedule_flush_plug(current);
+       else
+               blk_flush_plug(current);
+
        delayacct_blkio_start();
+       rq = raw_rq();
        atomic_inc(&rq->nr_iowait);
-       blk_flush_plug(current);
-       current->in_iowait = 1;
        ret = schedule_timeout(timeout);
-       current->in_iowait = 0;
+       current->in_iowait = old_iowait;
        atomic_dec(&rq->nr_iowait);
        delayacct_blkio_end();
+
        return ret;
 }
+EXPORT_SYMBOL(io_schedule_timeout);
 
 /**
  * sys_sched_get_priority_max - return maximum RT priority.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to