From: jun qian <qianjun.ker...@gmail.com>

Allow terminating the softirq processing loop without finishing the vectors.

Signed-off-by: jun qian <qianjun.ker...@gmail.com>
---
 kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index cbb59b5..29cf079 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -254,6 +254,22 @@ static inline bool __softirq_needs_break(u64 start)
        return false;
 }
 
+#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
+
+/*
+ * The pending_next_bit is recorded for the next processing order when
+ * the loop is broken. This per cpu variable is to solve the following
+ * scenarios:
+ * Assume bit 0 and 1 are pending when the processing starts. Now it
+ * breaks out after bit 0 has been handled and stores back bit 1 as
+ * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
+ * runs and handles bit 0, which takes more than the timeout. As a
+ * result the bit 0 processing can starve all other softirqs.
+ *
+ * so we need the pending_next_bit to record the next process order.
+ */
+DEFINE_PER_CPU(u32, pending_next_bit);
+
 asmlinkage __visible void __softirq_entry __do_softirq(void)
 {
        u64 start = sched_clock();
@@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
        unsigned int max_restart = MAX_SOFTIRQ_RESTART;
        struct softirq_action *h;
        unsigned long pending;
+       unsigned long pending_left, pending_again;
        unsigned int vec_nr;
        bool in_hardirq;
+       int next_bit;
+       unsigned long flags;
 
        /*
         * Mask out PF_MEMALLOC as the current task context is borrowed for the
@@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
 
        local_irq_enable();
 
-       for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
-               int prev_count;
-
-               __clear_bit(vec_nr, &pending);
-
-               h = softirq_vec + vec_nr;
-
-               prev_count = preempt_count();
-
-               kstat_incr_softirqs_this_cpu(vec_nr);
+       /*
+        * pending_left means that the left bits unhandled when the loop is
+        * broken without finishing the vectors. These bits will be handled
+        * first in the next time. pending_again means that the new bits is
+        * generated in the other time. These bits should be handled after
+        * the pending_left bits have been handled.
+        *
+        * For example
+        * If the pending bits is 1101010110, and the loop is broken after
+        * the bit4 is handled. Then, the pending_next_bit will be 5, and
+        * the pending_left is 1101000000, the pending_again is 000000110.
+        */
+       next_bit = __this_cpu_read(pending_next_bit);
+       pending_left = pending &
+               (SOFTIRQ_PENDING_MASK << next_bit);
+       pending_again = pending &
+               (SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
+
+       while (pending_left || pending_again) {
+               if  (pending_left) {
+                       pending = pending_left;
+                       pending_left = 0;
+               } else if (pending_again) {
+                       pending = pending_again;
+                       pending_again = 0;
+               } else
+                       break;
+               for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
+                       int prev_count;
+
+                       __clear_bit(vec_nr, &pending);
+
+                       h = softirq_vec + vec_nr;
+
+                       prev_count = preempt_count();
+
+                       kstat_incr_softirqs_this_cpu(vec_nr);
+
+                       trace_softirq_entry(vec_nr);
+                       h->action(h);
+                       trace_softirq_exit(vec_nr);
+                       if (unlikely(prev_count != preempt_count())) {
+                               pr_err("huh, entered softirq %u %s %p with 
preempt_count %08x, exited with %08x?\n",
+                                      vec_nr, softirq_to_name[vec_nr], 
h->action,
+                                      prev_count, preempt_count());
+                               preempt_count_set(prev_count);
+                       }
 
-               trace_softirq_entry(vec_nr);
-               h->action(h);
-               trace_softirq_exit(vec_nr);
-               if (unlikely(prev_count != preempt_count())) {
-                       pr_err("huh, entered softirq %u %s %p with 
preempt_count %08x, exited with %08x?\n",
-                              vec_nr, softirq_to_name[vec_nr], h->action,
-                              prev_count, preempt_count());
-                       preempt_count_set(prev_count);
+                       /* Allow early break to avoid big sched delay */
+                       if (pending && __softirq_needs_break(start)) {
+                               __this_cpu_write(pending_next_bit, vec_nr + 1);
+                               /*
+                                * Ensure that the remaining pending bits will 
be
+                                * handled in the next time.
+                                */
+                               local_irq_save(flags);
+                               or_softirq_pending(pending | pending_again);
+                               local_irq_restore(flags);
+                               break;
+                       }
                }
        }
 
@@ -309,12 +369,21 @@ asmlinkage __visible void __softirq_entry 
__do_softirq(void)
                rcu_softirq_qs();
        local_irq_disable();
 
-       pending = local_softirq_pending();
-       if (pending) {
-               if (!__softirq_needs_break(start) && --max_restart)
-                       goto restart;
+       /* get the unhandled bits */
+       pending |= pending_again;
+       if (!pending)
+               /*
+                * If all of the pending bits have been handled,
+                * reset the pending_next_bit to 0.
+                */
+               __this_cpu_write(pending_next_bit, 0);
 
+       if (pending)
                wakeup_softirqd();
+       else if (!__softirq_needs_break(start) && --max_restart) {
+               pending = local_softirq_pending();
+               if (pending)
+                       goto restart;
        }
 
        lockdep_softirq_end(in_hardirq);
-- 
1.8.3.1

Reply via email to