Commit 745e20f1b626 ("net: add a recursion limit in xmit path")
introduced a recursion limit, but it only applies to devices without a
queue. Virtual devices with a queue (either because they don't have
the IFF_NO_QUEUE flag, or because the administrator added one) can
still cause an unbounded recursion, via __dev_queue_xmit ->
__dev_xmit_skb -> qdisc_run -> __qdisc_run -> qdisc_restart ->
sch_direct_xmit -> dev_hard_start_xmit . Jianlin reported this in a
setup with 16 gretap devices stacked on top of one another.

This patch prevents the stack overflow by incrementing xmit_recursion in
code paths that can call dev_hard_start_xmit() (like commit 745e20f1b626
did). If the recursion limit is exceeded, the packet is enqueued and the
qdisc is scheduled.

Reported-by: Jianlin Shi <ji...@redhat.com>
Signed-off-by: Sabrina Dubroca <s...@queasysnail.net>
Reviewed-by: Stefano Brivio <sbri...@redhat.com>
---
No fixes tag, I think this is at least as old as what 745e20f1b626
("net: add a recursion limit in xmit path") fixed.

 include/net/pkt_sched.h | 14 ++++++++++++++
 net/core/dev.c          |  4 +++-
 net/sched/sch_generic.c |  7 +++++++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index a16fbe9a2a67..9384c1749bf3 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -113,6 +113,20 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
                     struct net_device *dev, struct netdev_queue *txq,
                     spinlock_t *root_lock, bool validate);
 
+static inline bool sch_direct_xmit_rec(struct sk_buff *skb, struct Qdisc *q,
+                                      struct net_device *dev,
+                                      struct netdev_queue *txq,
+                                      spinlock_t *root_lock, bool validate)
+{
+       bool ret;
+
+       __this_cpu_inc(xmit_recursion);
+       ret = sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
+       __this_cpu_dec(xmit_recursion);
+
+       return ret;
+}
+
 void __qdisc_run(struct Qdisc *q);
 
 static inline void qdisc_run(struct Qdisc *q)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b67f2aa59dd..74b77a773712 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3493,6 +3493,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, 
struct Qdisc *q,
                __qdisc_drop(skb, &to_free);
                rc = NET_XMIT_DROP;
        } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+                  likely(__this_cpu_read(xmit_recursion) <=
+                         XMIT_RECURSION_LIMIT) &&
                   qdisc_run_begin(q)) {
                /*
                 * This is a work-conserving queue; there are no old skbs
@@ -3502,7 +3504,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, 
struct Qdisc *q,
 
                qdisc_bstats_update(q, skb);
 
-               if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
+               if (sch_direct_xmit_rec(skb, q, dev, txq, root_lock, true)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
                                contended = false;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a117d9260558..2565ef18d4cc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -395,6 +395,12 @@ void __qdisc_run(struct Qdisc *q)
        int quota = dev_tx_weight;
        int packets;
 
+       if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
+               __netif_schedule(q);
+               return;
+       }
+
+       __this_cpu_inc(xmit_recursion);
        while (qdisc_restart(q, &packets)) {
                /*
                 * Ordered by possible occurrence: Postpone processing if
@@ -407,6 +413,7 @@ void __qdisc_run(struct Qdisc *q)
                        break;
                }
        }
+       __this_cpu_dec(xmit_recursion);
 }
 
 unsigned long dev_trans_start(struct net_device *dev)
-- 
2.21.0

Reply via email to