From: jun qian <qianjun.ker...@gmail.com> Allow terminating the softirq processing loop without finishing the vectors.
Signed-off-by: jun qian <qianjun.ker...@gmail.com> --- kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 22 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index cbb59b5..29cf079 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -254,6 +254,22 @@ static inline bool __softirq_needs_break(u64 start) return false; } +#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1) + +/* + * The pending_next_bit is recorded for the next processing order when + * the loop is broken. This per cpu variable is to solve the following + * scenarios: + * Assume bit 0 and 1 are pending when the processing starts. Now it + * breaks out after bit 0 has been handled and stores back bit 1 as + * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd + * runs and handles bit 0, which takes more than the timeout. As a + * result the bit 0 processing can starve all other softirqs. + * + * so we need the pending_next_bit to record the next process order. + */ +DEFINE_PER_CPU(u32, pending_next_bit); + asmlinkage __visible void __softirq_entry __do_softirq(void) { u64 start = sched_clock(); @@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) unsigned int max_restart = MAX_SOFTIRQ_RESTART; struct softirq_action *h; unsigned long pending; + unsigned long pending_left, pending_again; unsigned int vec_nr; bool in_hardirq; + int next_bit; + unsigned long flags; /* * Mask out PF_MEMALLOC as the current task context is borrowed for the @@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) local_irq_enable(); - for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) { - int prev_count; - - __clear_bit(vec_nr, &pending); - - h = softirq_vec + vec_nr; - - prev_count = preempt_count(); - - kstat_incr_softirqs_this_cpu(vec_nr); + /* + * pending_left means that the left bits unhandled when the loop is + * broken without finishing the vectors. These bits will be handled + * first in the next time. pending_again means that the new bits is + * generated in the other time. These bits should be handled after + * the pending_left bits have been handled. + * + * For example + * If the pending bits is 1101010110, and the loop is broken after + * the bit4 is handled. Then, the pending_next_bit will be 5, and + * the pending_left is 1101000000, the pending_again is 000000110. + */ + next_bit = __this_cpu_read(pending_next_bit); + pending_left = pending & + (SOFTIRQ_PENDING_MASK << next_bit); + pending_again = pending & + (SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit)); + + while (pending_left || pending_again) { + if (pending_left) { + pending = pending_left; + pending_left = 0; + } else if (pending_again) { + pending = pending_again; + pending_again = 0; + } else + break; + for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) { + int prev_count; + + __clear_bit(vec_nr, &pending); + + h = softirq_vec + vec_nr; + + prev_count = preempt_count(); + + kstat_incr_softirqs_this_cpu(vec_nr); + + trace_softirq_entry(vec_nr); + h->action(h); + trace_softirq_exit(vec_nr); + if (unlikely(prev_count != preempt_count())) { + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", + vec_nr, softirq_to_name[vec_nr], h->action, + prev_count, preempt_count()); + preempt_count_set(prev_count); + } - trace_softirq_entry(vec_nr); - h->action(h); - trace_softirq_exit(vec_nr); - if (unlikely(prev_count != preempt_count())) { - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", - vec_nr, softirq_to_name[vec_nr], h->action, - prev_count, preempt_count()); - preempt_count_set(prev_count); + /* Allow early break to avoid big sched delay */ + if (pending && __softirq_needs_break(start)) { + __this_cpu_write(pending_next_bit, vec_nr + 1); + /* + * Ensure that the remaining pending bits will be + * handled in the next time. + */ + local_irq_save(flags); + or_softirq_pending(pending | pending_again); + local_irq_restore(flags); + break; + } } } @@ -309,12 +369,21 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) rcu_softirq_qs(); local_irq_disable(); - pending = local_softirq_pending(); - if (pending) { - if (!__softirq_needs_break(start) && --max_restart) - goto restart; + /* get the unhandled bits */ + pending |= pending_again; + if (!pending) + /* + * If all of the pending bits have been handled, + * reset the pending_next_bit to 0. + */ + __this_cpu_write(pending_next_bit, 0); + if (pending) wakeup_softirqd(); + else if (!__softirq_needs_break(start) && --max_restart) { + pending = local_softirq_pending(); + if (pending) + goto restart; } lockdep_softirq_end(in_hardirq); -- 1.8.3.1