From: Qingfang DENG <qingfang.d...@siflower.com.cn>

br_offload_port_state is in a spinlock's critical section (&br->lock),
but rhashtable_init/rhashtable_free_and_destroy/flush_work may sleep,
causing scheduling while atomic bug.

To fix this, use workqueues to defer the init/destroy operations. To
synchronize between rhashtable insert/destroy, synchronize_rcu is used
to ensure all writers have left the RCU critical section before calling
rhashtable_free_and_destroy.

While at it, check for null pointers at the flow allocation.

Fixes: 94b4da9b4aad ("kernel: add a fast path for the bridge code")
Signed-off-by: Qingfang DENG <qingfang.d...@siflower.com.cn>
---
v3: fix race conditions and optimize code

 .../hack-5.15/600-bridge_offload.patch        | 97 +++++++++++++------
 1 file changed, 69 insertions(+), 28 deletions(-)

diff --git a/target/linux/generic/hack-5.15/600-bridge_offload.patch 
b/target/linux/generic/hack-5.15/600-bridge_offload.patch
index 9d71a741b2..87396611bd 100644
--- a/target/linux/generic/hack-5.15/600-bridge_offload.patch
+++ b/target/linux/generic/hack-5.15/600-bridge_offload.patch
@@ -150,16 +150,25 @@ Subject: [PATCH] net/bridge: add bridge offload
  
  /*
   * Determine initial path cost based on speed.
-@@ -428,7 +429,7 @@ static struct net_bridge_port *new_nbp(s
+@@ -362,6 +363,7 @@ static void del_nbp(struct net_bridge_po
+       kobject_del(&p->kobj);
+ 
+       br_netpoll_disable(p);
++      flush_work(&p->offload.deferred_work);
+ 
+       call_rcu(&p->rcu, destroy_nbp_rcu);
+ }
+@@ -428,7 +430,8 @@ static struct net_bridge_port *new_nbp(s
        p->path_cost = port_cost(dev);
        p->priority = 0x8000 >> BR_PORT_BITS;
        p->port_no = index;
 -      p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
 +      p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | 
BR_OFFLOAD;
++      br_offload_init_work(p);
        br_init_port(p);
        br_set_state(p, BR_STATE_DISABLED);
        br_stp_port_timer_init(p);
-@@ -771,6 +772,9 @@ void br_port_flags_change(struct net_bri
+@@ -771,6 +774,9 @@ void br_port_flags_change(struct net_bri
  
        if (mask & BR_NEIGH_SUPPRESS)
                br_recalculate_neigh_suppress_enabled(br);
@@ -199,7 +208,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  
 --- /dev/null
 +++ b/net/bridge/br_offload.c
-@@ -0,0 +1,438 @@
+@@ -0,0 +1,458 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +#include <linux/kernel.h>
 +#include <linux/workqueue.h>
@@ -306,7 +315,7 @@ Subject: [PATCH] net/bridge: add bridge offload
 +              p->br->offload_cache_reserved) >= p->br->offload_cache_size;
 +}
 +
-+static void
++void
 +br_offload_gc_work(struct work_struct *work)
 +{
 +      struct rhashtable_iter hti;
@@ -354,36 +363,53 @@ Subject: [PATCH] net/bridge: add bridge offload
 +
 +}
 +
-+void br_offload_port_state(struct net_bridge_port *p)
++void br_offload_deferred_work(struct work_struct *work)
 +{
-+      struct net_bridge_port_offload *o = &p->offload;
-+      bool enabled = true;
-+      bool flush = false;
++      struct net_bridge_port_offload *o;
++      struct net_bridge_port *p;
++      bool enabling, enabled;
 +
-+      if (p->state != BR_STATE_FORWARDING ||
-+          !(p->flags & BR_OFFLOAD))
-+              enabled = false;
++      p = container_of(work, struct net_bridge_port, offload.deferred_work);
++      o = &p->offload;
 +
-+      spin_lock_bh(&offload_lock);
-+      if (o->enabled == enabled)
-+              goto out;
++      spin_lock_bh(&p->br->lock);
++      enabling = o->enabling;
++      enabled = o->enabled;
++      spin_unlock_bh(&p->br->lock);
++
++      if (enabling == enabled)
++              return;
 +
-+      if (enabled) {
-+              if (!o->gc_work.func)
-+                      INIT_WORK(&o->gc_work, br_offload_gc_work);
++      if (enabling) {
 +              rhashtable_init(&o->rht, &flow_params);
++              spin_lock_bh(&offload_lock);
++              o->enabled = true;
++              spin_unlock_bh(&offload_lock);
 +      } else {
-+              flush = true;
++              spin_lock_bh(&offload_lock);
++              o->enabled = false;
++              spin_unlock_bh(&offload_lock);
++              /* Ensure all rht users have finished */
++              synchronize_rcu();
++              cancel_work_sync(&o->gc_work);
 +              rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o);
 +      }
++}
 +
-+      o->enabled = enabled;
++void br_offload_port_state(struct net_bridge_port *p)
++{
++      struct net_bridge_port_offload *o = &p->offload;
++      bool enabling = true;
 +
-+out:
-+      spin_unlock_bh(&offload_lock);
++      if (p->state != BR_STATE_FORWARDING ||
++          !(p->flags & BR_OFFLOAD))
++              enabling = false;
++
++      if (o->enabling == enabling)
++              return;
 +
-+      if (flush)
-+              flush_work(&o->gc_work);
++      o->enabling = enabling;
++      schedule_work(&o->deferred_work);
 +}
 +
 +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb)
@@ -475,6 +501,9 @@ Subject: [PATCH] net/bridge: add bridge offload
 +#endif
 +
 +      flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC);
++      if (unlikely(!flow))
++              goto out;
++
 +      flow->port = inp;
 +      memcpy(&flow->key, &key, sizeof(key));
 +
@@ -655,20 +684,22 @@ Subject: [PATCH] net/bridge: add bridge offload
  };
  
  #define MDB_PG_FLAGS_PERMANENT        BIT(0)
-@@ -343,6 +349,12 @@ struct net_bridge_mdb_entry {
+@@ -343,6 +349,14 @@ struct net_bridge_mdb_entry {
        struct rcu_head                 rcu;
  };
  
 +struct net_bridge_port_offload {
 +      struct rhashtable               rht;
 +      struct work_struct              gc_work;
++      struct work_struct              deferred_work;
 +      bool                            enabled;
++      bool                            enabling;
 +};
 +
  struct net_bridge_port {
        struct net_bridge               *br;
        struct net_device               *dev;
-@@ -403,6 +415,7 @@ struct net_bridge_port {
+@@ -403,6 +417,7 @@ struct net_bridge_port {
        u16                             backup_redirected_cnt;
  
        struct bridge_stp_xstats        stp_xstats;
@@ -676,7 +707,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  };
  
  #define kobj_to_brport(obj)   container_of(obj, struct net_bridge_port, kobj)
-@@ -519,6 +532,9 @@ struct net_bridge {
+@@ -519,6 +534,9 @@ struct net_bridge {
        struct kobject                  *ifobj;
        u32                             auto_cnt;
  
@@ -686,7 +717,7 @@ Subject: [PATCH] net/bridge: add bridge offload
  #ifdef CONFIG_NET_SWITCHDEV
        /* Counter used to make sure that hardware domains get unique
         * identifiers in case a bridge spans multiple switchdev instances.
-@@ -553,6 +569,10 @@ struct br_input_skb_cb {
+@@ -553,6 +571,10 @@ struct br_input_skb_cb {
  #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
        u8 br_netfilter_broute:1;
  #endif
@@ -699,7 +730,7 @@ Subject: [PATCH] net/bridge: add bridge offload
        /* Set if TX data plane offloading is used towards at least one
 --- /dev/null
 +++ b/net/bridge/br_private_offload.h
-@@ -0,0 +1,23 @@
+@@ -0,0 +1,33 @@
 +#ifndef __BR_OFFLOAD_H
 +#define __BR_OFFLOAD_H
 +
@@ -713,6 +744,16 @@ Subject: [PATCH] net/bridge: add bridge offload
 +                             struct netlink_ext_ack *extack);
 +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val,
 +                                 struct netlink_ext_ack *extack);
++void br_offload_deferred_work(struct work_struct *work);
++void br_offload_gc_work(struct work_struct *work);
++
++static inline void br_offload_init_work(struct net_bridge_port *p)
++{
++      struct net_bridge_port_offload *o = &p->offload;
++
++      INIT_WORK(&o->deferred_work, br_offload_deferred_work);
++      INIT_WORK(&o->gc_work, br_offload_gc_work);
++}
 +
 +static inline void br_offload_skb_disable(struct sk_buff *skb)
 +{
-- 
2.34.1


_______________________________________________
openwrt-devel mailing list
openwrt-devel@lists.openwrt.org
https://lists.openwrt.org/mailman/listinfo/openwrt-devel

Reply via email to