Commit 745e20f1b626 ("net: add a recursion limit in xmit path") introduced a recursion limit, but it only applies to devices without a queue. Virtual devices with a queue (either because they don't have the IFF_NO_QUEUE flag, or because the administrator added one) can still cause an unbounded recursion, via __dev_queue_xmit -> __dev_xmit_skb -> qdisc_run -> __qdisc_run -> qdisc_restart -> sch_direct_xmit -> dev_hard_start_xmit . Jianlin reported this in a setup with 16 gretap devices stacked on top of one another.
This patch prevents the stack overflow by incrementing xmit_recursion in code paths that can call dev_hard_start_xmit() (like commit 745e20f1b626 did). If the recursion limit is exceeded, the packet is enqueued and the qdisc is scheduled. Reported-by: Jianlin Shi <ji...@redhat.com> Signed-off-by: Sabrina Dubroca <s...@queasysnail.net> Reviewed-by: Stefano Brivio <sbri...@redhat.com> --- No fixes tag, I think this is at least as old as what 745e20f1b626 ("net: add a recursion limit in xmit path") fixed. include/net/pkt_sched.h | 14 ++++++++++++++ net/core/dev.c | 4 +++- net/sched/sch_generic.c | 7 +++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index a16fbe9a2a67..9384c1749bf3 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -113,6 +113,20 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq, spinlock_t *root_lock, bool validate); +static inline bool sch_direct_xmit_rec(struct sk_buff *skb, struct Qdisc *q, + struct net_device *dev, + struct netdev_queue *txq, + spinlock_t *root_lock, bool validate) +{ + bool ret; + + __this_cpu_inc(xmit_recursion); + ret = sch_direct_xmit(skb, q, dev, txq, root_lock, validate); + __this_cpu_dec(xmit_recursion); + + return ret; +} + void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) diff --git a/net/core/dev.c b/net/core/dev.c index 2b67f2aa59dd..74b77a773712 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3493,6 +3493,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, __qdisc_drop(skb, &to_free); rc = NET_XMIT_DROP; } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && + likely(__this_cpu_read(xmit_recursion) <= + XMIT_RECURSION_LIMIT) && qdisc_run_begin(q)) { /* * This is a work-conserving queue; there are no old skbs @@ -3502,7 +3504,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_bstats_update(q, skb); - if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) { + if (sch_direct_xmit_rec(skb, q, dev, txq, root_lock, true)) { if (unlikely(contended)) { spin_unlock(&q->busylock); contended = false; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a117d9260558..2565ef18d4cc 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -395,6 +395,12 @@ void __qdisc_run(struct Qdisc *q) int quota = dev_tx_weight; int packets; + if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { + __netif_schedule(q); + return; + } + + __this_cpu_inc(xmit_recursion); while (qdisc_restart(q, &packets)) { /* * Ordered by possible occurrence: Postpone processing if @@ -407,6 +413,7 @@ void __qdisc_run(struct Qdisc *q) break; } } + __this_cpu_dec(xmit_recursion); } unsigned long dev_trans_start(struct net_device *dev) -- 2.21.0