Hi Hillf, Thanks for the patch. I just tried it and it looks better than previous one. The issue appeared only once over ~30 mins stressing (without the patch , it shows up within 1 mins in usual, so I feel like we are getting close to the final fix) (pasted the modifications on my tree in case of any missing)
--- ./include/net/sch_generic.h.orig 2020-08-21 15:13:51.787952710 +0800 +++ ./include/net/sch_generic.h 2020-08-26 09:41:04.647173869 +0800 @@ -79,6 +79,7 @@ #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ + int pkt_seq; u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; --- ./include/net/pkt_sched.h.orig 2020-08-21 15:13:51.787952710 +0800 +++ ./include/net/pkt_sched.h 2020-08-26 09:42:14.491377514 +0800 @@ -117,8 +117,15 @@ static inline void qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { + q->pkt_seq = 0; + __qdisc_run(q); qdisc_run_end(q); + + /* reschedule qdisc if there are packets enqueued */ + if (q->pkt_seq != 0) + __netif_schedule(q); + } } --- ./net/core/dev.c.orig 2020-03-19 16:31:27.000000000 +0800 +++ ./net/core/dev.c 2020-08-26 09:47:57.783165885 +0800 @@ -2721,6 +2721,7 @@ local_irq_save(flags); sd = this_cpu_ptr(&softnet_data); + q->pkt_seq = 0; q->next_sched = NULL; *sd->output_queue_tailp = q; sd->output_queue_tailp = &q->next_sched; --- ./net/sched/sch_generic.c.orig 2020-08-24 22:02:04.589830751 +0800 +++ ./net/sched/sch_generic.c 2020-08-26 09:43:40.987852551 +0800 @@ -403,6 +403,9 @@ */ quota -= packets; if (quota <= 0 || need_resched()) { + /* info caller to reschedule qdisc outside q->seqlock */ + q->pkt_seq = 1; + __netif_schedule(q); break; } Hillf Danton <hdan...@sina.com> 于2020年8月26日周三 上午12:26写道: > > > Hi Feng, > > On Tue, 25 Aug 2020 15:14:12 +0800 Fengkehuan Feng wrote: > > Hi Hillf, > > > > I just tried the updated version and the system can boot up now. > > Thanks again for your testing. > > > It does mitigate the issue a lot but still couldn't get rid of it > > thoroughly. It seems to me like the effect of Cong's patch. > > Your echoes show we're still march in the dark and let's try another > direction in which qdisc is rescheduled outside seqlock to make sure > tx softirq is raised when there're more packets on the pfifo_fast to > be transmitted. > > CPU0 CPU1 > ---- ---- > seqlock > test __QDISC_STATE_SCHED > raise tx softirq > clear __QDISC_STATE_SCHED > try seqlock > __qdisc_run(q); > sequnlock > sequnlock > > > --- a/include/net/sch_generic.h > +++ b/include/net/sch_generic.h > @@ -79,6 +79,7 @@ struct Qdisc { > #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump > */ > #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ > #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ > + int pkt_seq; > u32 limit; > const struct Qdisc_ops *ops; > struct qdisc_size_table __rcu *stab; > --- a/include/net/pkt_sched.h > +++ b/include/net/pkt_sched.h > @@ -118,6 +118,8 @@ void __qdisc_run(struct Qdisc *q); > static inline void qdisc_run(struct Qdisc *q) > { > if (qdisc_run_begin(q)) { > + q->pkt_seq = 0; > + > /* NOLOCK qdisc must check 'state' under the qdisc seqlock > * to avoid racing with dev_qdisc_reset() > */ > @@ -125,6 +127,10 @@ static inline void qdisc_run(struct Qdis > likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) > __qdisc_run(q); > qdisc_run_end(q); > + > + /* reschedule qdisc if there are packets enqueued */ > + if (q->pkt_seq != 0) > + __netif_schedule(q); > } > } > > --- a/net/sched/sch_generic.c > +++ b/net/sched/sch_generic.c > @@ -384,6 +384,8 @@ void __qdisc_run(struct Qdisc *q) > while (qdisc_restart(q, &packets)) { > quota -= packets; > if (quota <= 0) { > + /* info caller to reschedule qdisc outside q->seqlock > */ > + q->pkt_seq = 1; > __netif_schedule(q); > break; > } > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -3031,6 +3031,7 @@ static void __netif_reschedule(struct Qd > > local_irq_save(flags); > sd = this_cpu_ptr(&softnet_data); > + q->pkt_seq = 0; > q->next_sched = NULL; > *sd->output_queue_tailp = q; > sd->output_queue_tailp = &q->next_sched; > -- >