From: Qingfang DENG <qingfang.d...@siflower.com.cn> br_offload_port_state is in a spinlock's critical section (&br->lock), but rhashtable_init/rhashtable_free_and_destroy/flush_work may sleep, causing scheduling while atomic bug.
To fix this, use workqueues to defer the init/destroy operations. To synchronize between rhashtable insert/destroy, synchronize_rcu is used to ensure all writers have left the RCU critical section before calling rhashtable_free_and_destroy. While at it, check for null pointers at the flow allocation. Fixes: 94b4da9b4aad ("kernel: add a fast path for the bridge code") Signed-off-by: Qingfang DENG <qingfang.d...@siflower.com.cn> --- v2: use my work email .../hack-5.10/600-bridge_offload.patch | 121 ++++++++++++++---- .../hack-5.15/600-bridge_offload.patch | 121 ++++++++++++++---- 2 files changed, 198 insertions(+), 44 deletions(-) diff --git a/target/linux/generic/hack-5.10/600-bridge_offload.patch b/target/linux/generic/hack-5.10/600-bridge_offload.patch index 82282627ea..1c03ac2cbd 100644 --- a/target/linux/generic/hack-5.10/600-bridge_offload.patch +++ b/target/linux/generic/hack-5.10/600-bridge_offload.patch @@ -153,16 +153,25 @@ Submitted-by: Felix Fietkau <n...@nbd.name> /* * Determine initial path cost based on speed. -@@ -427,7 +428,7 @@ static struct net_bridge_port *new_nbp(s +@@ -361,6 +362,7 @@ static void del_nbp(struct net_bridge_po + kobject_del(&p->kobj); + + br_netpoll_disable(p); ++ flush_work(&p->offload.deferred_work); + + call_rcu(&p->rcu, destroy_nbp_rcu); + } +@@ -427,7 +429,8 @@ static struct net_bridge_port *new_nbp(s p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_OFFLOAD; ++ br_offload_init_work(p); br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); -@@ -777,6 +778,9 @@ void br_port_flags_change(struct net_bri +@@ -777,6 +780,9 @@ void br_port_flags_change(struct net_bri if (mask & BR_NEIGH_SUPPRESS) br_recalculate_neigh_suppress_enabled(br); @@ -202,7 +211,7 @@ Submitted-by: Felix Fietkau <n...@nbd.name> nbp_vlan_group_rcu(p))) --- /dev/null +++ b/net/bridge/br_offload.c -@@ -0,0 +1,436 @@ +@@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/workqueue.h> @@ -236,6 +245,11 @@ Submitted-by: Felix Fietkau <n...@nbd.name> + struct rcu_head rcu; +}; + ++struct bridge_offload_deferred_item { ++ struct list_head list; ++ bool enable; ++}; ++ +static const struct rhashtable_params flow_params = { + .automatic_shrinking = true, + .head_offset = offsetof(struct bridge_flow, node), @@ -309,7 +323,7 @@ Submitted-by: Felix Fietkau <n...@nbd.name> + p->br->offload_cache_reserved) >= p->br->offload_cache_size; +} + -+static void ++void +br_offload_gc_work(struct work_struct *work) +{ + struct rhashtable_iter hti; @@ -357,11 +371,57 @@ Submitted-by: Felix Fietkau <n...@nbd.name> + +} + ++static struct bridge_offload_deferred_item * ++br_offload_deferred_dequeue(struct net_bridge_port *p) ++{ ++ struct bridge_offload_deferred_item *bi = NULL; ++ ++ spin_lock_bh(&p->br->lock); ++ if (list_empty(&p->offload.deferred_list)) ++ goto out; ++ ++ bi = list_first_entry(&p->offload.deferred_list, ++ struct bridge_offload_deferred_item, list); ++ list_del(&bi->list); ++out: ++ spin_unlock_bh(&p->br->lock); ++ return bi; ++} ++ ++void br_offload_deferred_work(struct work_struct *work) ++{ ++ struct bridge_offload_deferred_item *bi; ++ struct net_bridge_port_offload *o; ++ struct net_bridge_port *p; ++ ++ p = container_of(work, struct net_bridge_port, offload.deferred_work); ++ o = &p->offload; ++ while ((bi = br_offload_deferred_dequeue(p)) != NULL) { ++ if (bi->enable) { ++ rhashtable_init(&o->rht, &flow_params); ++ spin_lock_bh(&offload_lock); ++ o->enabled = true; ++ spin_unlock_bh(&offload_lock); ++ } else { ++ spin_lock_bh(&offload_lock); ++ o->enabled = false; ++ spin_unlock_bh(&offload_lock); ++ /* Ensure all rht users have finished */ ++ synchronize_rcu(); ++ cancel_work_sync(&o->gc_work); ++ rhashtable_free_and_destroy(&o->rht, ++ br_offload_destroy_cb, o); ++ } ++ kfree(bi); ++ } ++} ++ +void br_offload_port_state(struct net_bridge_port *p) +{ + struct net_bridge_port_offload *o = &p->offload; ++ struct bridge_offload_deferred_item *bi; ++ gfp_t gfp = GFP_ATOMIC; + bool enabled = true; -+ bool flush = false; + + if (p->state != BR_STATE_FORWARDING || + !(p->flags & BR_OFFLOAD)) @@ -371,22 +431,23 @@ Submitted-by: Felix Fietkau <n...@nbd.name> + if (o->enabled == enabled) + goto out; + -+ if (enabled) { -+ if (!o->gc_work.func) -+ INIT_WORK(&o->gc_work, br_offload_gc_work); -+ rhashtable_init(&o->rht, &flow_params); -+ } else { -+ flush = true; -+ rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o); -+ } ++ bi = kmalloc(sizeof(*bi), gfp); ++ if (!bi) { ++ if (enabled) ++ goto out; + -+ o->enabled = enabled; ++ /* If we are disabling (freeing the rht), retry with ++ * __GFP_MEMALLOC. Panic if that also fails. ++ */ ++ gfp |= __GFP_MEMALLOC | __GFP_NOWARN; ++ bi = kmalloc(sizeof(*bi), gfp); ++ } + ++ bi->enable = enabled; ++ list_add_tail(&bi->list, &o->deferred_list); ++ schedule_work(&o->deferred_work); +out: + spin_unlock_bh(&offload_lock); -+ -+ if (flush) -+ flush_work(&o->gc_work); +} + +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb) @@ -478,6 +539,9 @@ Submitted-by: Felix Fietkau <n...@nbd.name> +#endif + + flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC); ++ if (unlikely(!flow)) ++ goto out; ++ + flow->port = inp; + memcpy(&flow->key, &key, sizeof(key)); + @@ -656,20 +720,22 @@ Submitted-by: Felix Fietkau <n...@nbd.name> }; #define MDB_PG_FLAGS_PERMANENT BIT(0) -@@ -280,6 +286,12 @@ struct net_bridge_mdb_entry { +@@ -280,6 +286,14 @@ struct net_bridge_mdb_entry { struct rcu_head rcu; }; +struct net_bridge_port_offload { + struct rhashtable rht; + struct work_struct gc_work; ++ struct work_struct deferred_work; ++ struct list_head deferred_list; + bool enabled; +}; + struct net_bridge_port { struct net_bridge *br; struct net_device *dev; -@@ -337,6 +349,7 @@ struct net_bridge_port { +@@ -337,6 +351,7 @@ struct net_bridge_port { u16 backup_redirected_cnt; struct bridge_stp_xstats stp_xstats; @@ -677,7 +743,7 @@ Submitted-by: Felix Fietkau <n...@nbd.name> }; #define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj) -@@ -475,6 +488,9 @@ struct net_bridge { +@@ -475,6 +490,9 @@ struct net_bridge { struct kobject *ifobj; u32 auto_cnt; @@ -687,7 +753,7 @@ Submitted-by: Felix Fietkau <n...@nbd.name> #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif -@@ -501,6 +517,10 @@ struct br_input_skb_cb { +@@ -501,6 +519,10 @@ struct br_input_skb_cb { #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE u8 br_netfilter_broute:1; #endif @@ -700,7 +766,7 @@ Submitted-by: Felix Fietkau <n...@nbd.name> int offload_fwd_mark; --- /dev/null +++ b/net/bridge/br_private_offload.h -@@ -0,0 +1,21 @@ +@@ -0,0 +1,32 @@ +#ifndef __BR_OFFLOAD_H +#define __BR_OFFLOAD_H + @@ -712,6 +778,17 @@ Submitted-by: Felix Fietkau <n...@nbd.name> +void br_offload_fini(void); +int br_offload_set_cache_size(struct net_bridge *br, unsigned long val); +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val); ++void br_offload_deferred_work(struct work_struct *work); ++void br_offload_gc_work(struct work_struct *work); ++ ++static inline void br_offload_init_work(struct net_bridge_port *p) ++{ ++ struct net_bridge_port_offload *o = &p->offload; ++ ++ INIT_LIST_HEAD(&o->deferred_list); ++ INIT_WORK(&o->deferred_work, br_offload_deferred_work); ++ INIT_WORK(&o->gc_work, br_offload_gc_work); ++} + +static inline void br_offload_skb_disable(struct sk_buff *skb) +{ diff --git a/target/linux/generic/hack-5.15/600-bridge_offload.patch b/target/linux/generic/hack-5.15/600-bridge_offload.patch index 9d71a741b2..118df44fec 100644 --- a/target/linux/generic/hack-5.15/600-bridge_offload.patch +++ b/target/linux/generic/hack-5.15/600-bridge_offload.patch @@ -150,16 +150,25 @@ Subject: [PATCH] net/bridge: add bridge offload /* * Determine initial path cost based on speed. -@@ -428,7 +429,7 @@ static struct net_bridge_port *new_nbp(s +@@ -362,6 +363,7 @@ static void del_nbp(struct net_bridge_po + kobject_del(&p->kobj); + + br_netpoll_disable(p); ++ flush_work(&p->offload.deferred_work); + + call_rcu(&p->rcu, destroy_nbp_rcu); + } +@@ -428,7 +430,8 @@ static struct net_bridge_port *new_nbp(s p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD | BR_OFFLOAD; ++ br_offload_init_work(p); br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); -@@ -771,6 +772,9 @@ void br_port_flags_change(struct net_bri +@@ -771,6 +774,9 @@ void br_port_flags_change(struct net_bri if (mask & BR_NEIGH_SUPPRESS) br_recalculate_neigh_suppress_enabled(br); @@ -199,7 +208,7 @@ Subject: [PATCH] net/bridge: add bridge offload --- /dev/null +++ b/net/bridge/br_offload.c -@@ -0,0 +1,438 @@ +@@ -0,0 +1,493 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/workqueue.h> @@ -233,6 +242,11 @@ Subject: [PATCH] net/bridge: add bridge offload + struct rcu_head rcu; +}; + ++struct bridge_offload_deferred_item { ++ struct list_head list; ++ bool enable; ++}; ++ +static const struct rhashtable_params flow_params = { + .automatic_shrinking = true, + .head_offset = offsetof(struct bridge_flow, node), @@ -306,7 +320,7 @@ Subject: [PATCH] net/bridge: add bridge offload + p->br->offload_cache_reserved) >= p->br->offload_cache_size; +} + -+static void ++void +br_offload_gc_work(struct work_struct *work) +{ + struct rhashtable_iter hti; @@ -354,11 +368,57 @@ Subject: [PATCH] net/bridge: add bridge offload + +} + ++static struct bridge_offload_deferred_item * ++br_offload_deferred_dequeue(struct net_bridge_port *p) ++{ ++ struct bridge_offload_deferred_item *bi = NULL; ++ ++ spin_lock_bh(&p->br->lock); ++ if (list_empty(&p->offload.deferred_list)) ++ goto out; ++ ++ bi = list_first_entry(&p->offload.deferred_list, ++ struct bridge_offload_deferred_item, list); ++ list_del(&bi->list); ++out: ++ spin_unlock_bh(&p->br->lock); ++ return bi; ++} ++ ++void br_offload_deferred_work(struct work_struct *work) ++{ ++ struct bridge_offload_deferred_item *bi; ++ struct net_bridge_port_offload *o; ++ struct net_bridge_port *p; ++ ++ p = container_of(work, struct net_bridge_port, offload.deferred_work); ++ o = &p->offload; ++ while ((bi = br_offload_deferred_dequeue(p)) != NULL) { ++ if (bi->enable) { ++ rhashtable_init(&o->rht, &flow_params); ++ spin_lock_bh(&offload_lock); ++ o->enabled = true; ++ spin_unlock_bh(&offload_lock); ++ } else { ++ spin_lock_bh(&offload_lock); ++ o->enabled = false; ++ spin_unlock_bh(&offload_lock); ++ /* Ensure all rht users have finished */ ++ synchronize_rcu(); ++ cancel_work_sync(&o->gc_work); ++ rhashtable_free_and_destroy(&o->rht, ++ br_offload_destroy_cb, o); ++ } ++ kfree(bi); ++ } ++} ++ +void br_offload_port_state(struct net_bridge_port *p) +{ + struct net_bridge_port_offload *o = &p->offload; ++ struct bridge_offload_deferred_item *bi; ++ gfp_t gfp = GFP_ATOMIC; + bool enabled = true; -+ bool flush = false; + + if (p->state != BR_STATE_FORWARDING || + !(p->flags & BR_OFFLOAD)) @@ -368,22 +428,23 @@ Subject: [PATCH] net/bridge: add bridge offload + if (o->enabled == enabled) + goto out; + -+ if (enabled) { -+ if (!o->gc_work.func) -+ INIT_WORK(&o->gc_work, br_offload_gc_work); -+ rhashtable_init(&o->rht, &flow_params); -+ } else { -+ flush = true; -+ rhashtable_free_and_destroy(&o->rht, br_offload_destroy_cb, o); -+ } ++ bi = kmalloc(sizeof(*bi), gfp); ++ if (!bi) { ++ if (enabled) ++ goto out; + -+ o->enabled = enabled; ++ /* If we are disabling (freeing the rht), retry with ++ * __GFP_MEMALLOC. Panic if that also fails. ++ */ ++ gfp |= __GFP_MEMALLOC | __GFP_NOWARN; ++ bi = kmalloc(sizeof(*bi), gfp); ++ } + ++ bi->enable = enabled; ++ list_add_tail(&bi->list, &o->deferred_list); ++ schedule_work(&o->deferred_work); +out: + spin_unlock_bh(&offload_lock); -+ -+ if (flush) -+ flush_work(&o->gc_work); +} + +void br_offload_fdb_update(const struct net_bridge_fdb_entry *fdb) @@ -475,6 +536,9 @@ Subject: [PATCH] net/bridge: add bridge offload +#endif + + flow = kmem_cache_alloc(offload_cache, GFP_ATOMIC); ++ if (unlikely(!flow)) ++ goto out; ++ + flow->port = inp; + memcpy(&flow->key, &key, sizeof(key)); + @@ -655,20 +719,22 @@ Subject: [PATCH] net/bridge: add bridge offload }; #define MDB_PG_FLAGS_PERMANENT BIT(0) -@@ -343,6 +349,12 @@ struct net_bridge_mdb_entry { +@@ -343,6 +349,14 @@ struct net_bridge_mdb_entry { struct rcu_head rcu; }; +struct net_bridge_port_offload { + struct rhashtable rht; + struct work_struct gc_work; ++ struct work_struct deferred_work; ++ struct list_head deferred_list; + bool enabled; +}; + struct net_bridge_port { struct net_bridge *br; struct net_device *dev; -@@ -403,6 +415,7 @@ struct net_bridge_port { +@@ -403,6 +417,7 @@ struct net_bridge_port { u16 backup_redirected_cnt; struct bridge_stp_xstats stp_xstats; @@ -676,7 +742,7 @@ Subject: [PATCH] net/bridge: add bridge offload }; #define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj) -@@ -519,6 +532,9 @@ struct net_bridge { +@@ -519,6 +534,9 @@ struct net_bridge { struct kobject *ifobj; u32 auto_cnt; @@ -686,7 +752,7 @@ Subject: [PATCH] net/bridge: add bridge offload #ifdef CONFIG_NET_SWITCHDEV /* Counter used to make sure that hardware domains get unique * identifiers in case a bridge spans multiple switchdev instances. -@@ -553,6 +569,10 @@ struct br_input_skb_cb { +@@ -553,6 +571,10 @@ struct br_input_skb_cb { #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE u8 br_netfilter_broute:1; #endif @@ -699,7 +765,7 @@ Subject: [PATCH] net/bridge: add bridge offload /* Set if TX data plane offloading is used towards at least one --- /dev/null +++ b/net/bridge/br_private_offload.h -@@ -0,0 +1,23 @@ +@@ -0,0 +1,34 @@ +#ifndef __BR_OFFLOAD_H +#define __BR_OFFLOAD_H + @@ -713,6 +779,17 @@ Subject: [PATCH] net/bridge: add bridge offload + struct netlink_ext_ack *extack); +int br_offload_set_cache_reserved(struct net_bridge *br, unsigned long val, + struct netlink_ext_ack *extack); ++void br_offload_deferred_work(struct work_struct *work); ++void br_offload_gc_work(struct work_struct *work); ++ ++static inline void br_offload_init_work(struct net_bridge_port *p) ++{ ++ struct net_bridge_port_offload *o = &p->offload; ++ ++ INIT_LIST_HEAD(&o->deferred_list); ++ INIT_WORK(&o->deferred_work, br_offload_deferred_work); ++ INIT_WORK(&o->gc_work, br_offload_gc_work); ++} + +static inline void br_offload_skb_disable(struct sk_buff *skb) +{ -- 2.34.1 _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/mailman/listinfo/openwrt-devel