On Tue, 2016-05-10 at 14:53 -0700, Eric Dumazet wrote:
> On Tue, 2016-05-10 at 17:35 -0400, Rik van Riel wrote:
> 
> > You might need another one of these in invoke_softirq()
> > 
> 
> Excellent.
> 
> I gave it a quick try (without your suggestion), and host seems to
> survive a stress test.

Well, we instantly trigger rcu issues.

How to reproduce :

netserver &
for i in `seq 1 100`
do
  netperf -H 127.0.0.1 -t TCP_RR -l 1000 &
done
# local hack to enable the new behavior
# without having to add a new sysctl, but hacking an existing one
echo 1001 >/proc/sys/net/core/netdev_max_backlog

<bang :>



[  236.977511] INFO: rcu_sched self-detected stall on CPU
[  236.977512] INFO: rcu_sched self-detected stall on CPU
[  236.977515] INFO: rcu_sched self-detected stall on CPU
[  236.977518] INFO: rcu_sched self-detected stall on CPU
[  236.977519] INFO: rcu_sched self-detected stall on CPU
[  236.977521] INFO: rcu_sched self-detected stall on CPU
[  236.977522] INFO: rcu_sched self-detected stall on CPU
[  236.977523] INFO: rcu_sched self-detected stall on CPU
[  236.977525] INFO: rcu_sched self-detected stall on CPU
[  236.977526] INFO: rcu_sched self-detected stall on CPU
[  236.977527] INFO: rcu_sched self-detected stall on CPU
[  236.977529] INFO: rcu_sched self-detected stall on CPU
[  236.977530] INFO: rcu_sched self-detected stall on CPU
[  236.977532] INFO: rcu_sched self-detected stall on CPU
[  236.977532]  47-...: (1 GPs behind) idle=8d1/1/0 softirq=2500/2506 fqs=1 
[  236.977535] INFO: rcu_sched self-detected stall on CPU
[  236.977536] INFO: rcu_sched self-detected stall on CPU
[  236.977540]  36-...: (1 GPs behind) idle=d05/1/0 softirq=2637/2644 fqs=1 
[  236.977546]  
[  236.977546]  38-...: (1 GPs behind) idle=a5b/1/0 softirq=2612/2618 fqs=1 
[  236.977549]  0-...: (1 GPs behind) idle=c39/1/0 softirq=15315/15321 fqs=1 
[  236.977551]  24-...: (1 GPs behind) idle=ea3/1/0 softirq=2455/2461 fqs=1 
[  236.977554]  18-...: (20995 ticks this GP) idle=ef5/1/0 softirq=8530/8530 
fqs=1 
[  236.977556]  39-...: (1 GPs behind) idle=f9d/1/0 softirq=2144/2150 fqs=1 
[  236.977558]  
[  236.977558]  22-...: (1 GPs behind) idle=5a7/1/0 softirq=10238/10244 fqs=1 
[  236.977561]  7-...: (1 GPs behind) idle=323/1/0 softirq=5279/5285 fqs=1 
[  236.977563]  31-...: (1 GPs behind) idle=47d/1/0 softirq=2526/2532 fqs=1 
[  236.977565]  33-...: (1 GPs behind) idle=175/1/0 softirq=2060/2066 fqs=1 
[  236.977568]  10-...: (1 GPs behind) idle=c3d/1/0 softirq=4864/4870 fqs=1 
[  236.977570]  34-...: (20995 ticks this GP) idle=dd5/1/0 softirq=2243/2243 
fqs=1 
[  236.977574]  
[  236.977574]  37-...: (1 GPs behind) idle=aef/1/0 softirq=2660/2666 fqs=1 
[  236.977576]  13-...: (1 GPs behind) idle=a2b/1/0 softirq=9928/9934 fqs=1 
[  236.977578]  
[  236.977578]  
[  236.977579]  
[  236.977580]  
[  236.977582]  
[  236.977583]  
[  236.977583]  
[  236.977584]  
[  236.977584]  
[  236.977586]  
[  236.977587] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977588]  
[  236.977589]  
[  236.977595] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977603] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977607] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977609] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977610] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977612] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977614] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977616] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977618] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977619] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977620] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977622] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977626] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.977627] rcu_sched kthread starved for 20997 jiffies! g33049 c33048 f0x0 
RCU_GP_WAIT_FQS(3) ->state=0x1
[  236.978512] INFO: rcu_sched self-detected stall on CPU
[  236.978512] INFO: rcu_sched self-detected stall on CPU
[  236.978514] INFO: rcu_sched self-detected stall on CPU
[  236.978516] INFO: rcu_sched self-detected stall on CPU
[  236.978517] INFO: rcu_sched self-detected stall on CPU
[  236.978518] INFO: rcu_sched self-detected stall on CPU
[  236.978519] INFO: rcu_sched self-detected stall on CPU
[  236.978520] INFO: rcu_sched self-detected stall on CPU
[  236.978521] INFO: rcu_sched self-detected stall on CPU
[  236.978522] INFO: rcu_sched self-detected stall on CPU
[  236.978523] INFO: rcu_sched self-detected stall on CPU
[  236.978524] INFO: rcu_sched self-detected stall on CPU
[  236.978532]  45-...: (1 GPs behind) idle=8ed/1/0 softirq=3047/3053 fqs=1 
[  236.978534]  19-...: (20996 ticks this GP) idle=b5d/1/0 softirq=8157/8157 
fqs=1 
[  236.978538]  17-...: (1 GPs behind) idle=5ad/1/0 softirq=7839/7845 fqs=1 
[  236.978539]  41-...: (1 GPs behind) idle=f4f/1/0 softirq=2345/2351 fqs=1 
[  236.978542]  6-...: (1 GPs behind) idle=a39/1/0 softirq=5492/5498 fqs=1 
[  236.978544]  30-...: (1 GPs behind) idle=c51/1/0 softirq=2499/2505 fqs=1 
[  236.978546]  5-...: (1 GPs behind) idle=917/1/0 softirq=5196/5202 fqs=1 
[  236.978548]  26-...: (20996 ticks this GP) idle=c61/1/0 softirq=2863/2863 
fqs=1 
[  236.978550]  32-...: (1 GPs behind) idle=8db/1/0 softirq=2588/2594 fqs=1 
[  236.978552]  35-...: (1 GPs behind) idle=351/1/0 softirq=1869/1875 fqs=1 
[  236.978554]  8-...: (1 GPs behind) idle=221/1/0 softirq=5192/5198 fqs=1 
[  236.978556]  11-...: (1 GPs behind) idle=485/1/0 softirq=4480/4486 fqs=1 
[  236.978557]  
[  236.978558]  
[  236.978559]  
[  236.978560]  
[  236.978561]  


Tentative proto / patch (not including Peter suggestions yet)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index 17caf4b63342..be94e0241a70 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -56,6 +56,14 @@ EXPORT_SYMBOL(irq_stat);
 static struct softirq_action softirq_vec[NR_SOFTIRQS] 
__cacheline_aligned_in_smp;
 
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+DEFINE_PER_CPU(bool, ksoftirqd_scheduled);
+
+static inline bool ksoftirqd_running(void)
+{
+       extern int netdev_max_backlog; /* temp hack */
+
+       return (netdev_max_backlog & 1) && __this_cpu_read(ksoftirqd_scheduled);
+}
 
 const char * const softirq_to_name[NR_SOFTIRQS] = {
        "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -73,8 +81,10 @@ static void wakeup_softirqd(void)
        /* Interrupts are disabled: no need to stop preemption */
        struct task_struct *tsk = __this_cpu_read(ksoftirqd);
 
-       if (tsk && tsk->state != TASK_RUNNING)
+       if (tsk && tsk->state != TASK_RUNNING) {
+               __this_cpu_write(ksoftirqd_scheduled, true);
                wake_up_process(tsk);
+       }
 }
 
 /*
@@ -313,7 +323,7 @@ asmlinkage __visible void do_softirq(void)
 
        pending = local_softirq_pending();
 
-       if (pending)
+       if (pending && !ksoftirqd_running())
                do_softirq_own_stack();
 
        local_irq_restore(flags);
@@ -340,6 +350,9 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
+       if (ksoftirqd_running())
+               return;
+
        if (!force_irqthreads) {
 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
                /*
@@ -660,6 +673,8 @@ static void run_ksoftirqd(unsigned int cpu)
                 * in the task stack here.
                 */
                __do_softirq();
+               if (!local_softirq_pending())
+                       __this_cpu_write(ksoftirqd_scheduled, false);
                local_irq_enable();
                cond_resched_rcu_qs();
                return;



Reply via email to