Plan B: 1. Use p->br (the back pointer) as the RCU sentinel 2. Keep dev->br_port set until after the RCU transition 3. Order operations so we don't have to worry about p->br being NULL in the case of STP timers.
Untested. Probably excessive use of rcu() macros. Since it is only needed in cases where no locks held (ie. !(RTNL | br_lock)) --- br-fix.orig/net/bridge/br_fdb.c +++ br-fix/net/bridge/br_fdb.c @@ -67,9 +67,9 @@ static __inline__ void fdb_delete(struct br_fdb_put(f); } -void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) +void br_fdb_changeaddr(struct net_bridge *br, struct net_bridge_port *p, + const unsigned char *newaddr) { - struct net_bridge *br = p->br; int i; spin_lock_bh(&br->hash_lock); --- br-fix.orig/net/bridge/br_if.c +++ br-fix/net/bridge/br_if.c @@ -79,24 +79,31 @@ static int port_cost(struct net_device * */ static void port_carrier_check(void *arg) { - struct net_bridge_port *p = arg; + struct net_device *dev = arg; + struct net_bridge_port *p; + struct net_bridge *br; rtnl_lock(); - if (netif_carrier_ok(p->dev)) { - u32 cost = port_cost(p->dev); + if ((p = dev->br_port) == NULL || + (br = rcu_dereference(p->br)) == NULL) + goto done; - spin_lock_bh(&p->br->lock); + if (netif_carrier_ok(dev)) { + u32 cost = port_cost(dev); + + spin_lock_bh(&br->lock); if (p->state == BR_STATE_DISABLED) { p->path_cost = cost; br_stp_enable_port(p); } - spin_unlock_bh(&p->br->lock); + spin_unlock_bh(&br->lock); } else { - spin_lock_bh(&p->br->lock); + spin_lock_bh(&br->lock); if (p->state != BR_STATE_DISABLED) br_stp_disable_port(p); - spin_unlock_bh(&p->br->lock); + spin_unlock_bh(&br->lock); } +done: rtnl_unlock(); } @@ -104,7 +111,7 @@ static void destroy_nbp(struct net_bridg { struct net_device *dev = p->dev; - p->br = NULL; + dev->br_port = NULL; p->dev = NULL; dev_put(dev); @@ -119,29 +126,25 @@ static void destroy_nbp_rcu(struct rcu_h } /* called with RTNL */ -static void del_nbp(struct net_bridge_port *p) +static void del_nbp(struct net_bridge *br, struct net_bridge_port *p) { - struct net_bridge *br = p->br; - struct net_device *dev = p->dev; - - dev->br_port = NULL; - dev_set_promiscuity(dev, -1); + dev_set_promiscuity(p->dev, -1); cancel_delayed_work(&p->carrier_check); - flush_scheduled_work(); spin_lock_bh(&br->lock); br_stp_disable_port(p); + list_del_rcu(&p->list); spin_unlock_bh(&br->lock); br_fdb_delete_by_port(br, p); - list_del_rcu(&p->list); - del_timer_sync(&p->message_age_timer); del_timer_sync(&p->forward_delay_timer); del_timer_sync(&p->hold_timer); + rcu_assign_pointer(p->br, NULL); + call_rcu(&p->rcu, destroy_nbp_rcu); } @@ -152,7 +155,7 @@ static void del_br(struct net_bridge *br list_for_each_entry_safe(p, n, &br->port_list, list) { br_sysfs_removeif(p); - del_nbp(p); + del_nbp(br, p); } del_timer_sync(&br->gc_timer); @@ -249,7 +252,7 @@ static struct net_bridge_port *new_nbp(s p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; - INIT_WORK(&p->carrier_check, port_carrier_check, p); + INIT_WORK(&p->carrier_check, port_carrier_check, dev); kobject_init(&p->kobj); return p; @@ -386,7 +389,7 @@ int br_add_if(struct net_bridge *br, str destroy_nbp(p); else if ((err = br_sysfs_addif(p))) - del_nbp(p); + del_nbp(br, p); else { dev_set_promiscuity(dev, 1); @@ -415,7 +418,7 @@ int br_del_if(struct net_bridge *br, str return -EINVAL; br_sysfs_removeif(p); - del_nbp(p); + del_nbp(br, p); spin_lock_bh(&br->lock); br_stp_recalculate_bridge_id(br); --- br-fix.orig/net/bridge/br_input.c +++ br-fix/net/bridge/br_input.c @@ -46,17 +46,19 @@ int br_handle_frame_finish(struct sk_buf { const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge *br; struct net_bridge_fdb_entry *dst; int passedup = 0; + br = rcu_dereference(p->br); + if (!br) + goto drop; + /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source); - if (p->state == BR_STATE_LEARNING) { - kfree_skb(skb); - goto out; - } + if (p->state == BR_STATE_LEARNING) + goto drop; if (br->dev->flags & IFF_PROMISC) { struct sk_buff *skb2; @@ -72,26 +74,23 @@ int br_handle_frame_finish(struct sk_buf br_flood_forward(br, skb, !passedup); if (!passedup) br_pass_frame_up(br, skb); - goto out; } - - dst = __br_fdb_get(br, dest); - if (dst != NULL && dst->is_local) { - if (!passedup) + else if ((dst = __br_fdb_get(br, dest)) != NULL) { + if (dst->is_local) { + if (passedup) + goto drop; br_pass_frame_up(br, skb); + } else - kfree_skb(skb); - goto out; + br_forward(dst->dst, skb); } + else + br_flood_forward(br, skb, 0); - if (dst != NULL) { - br_forward(dst->dst, skb); - goto out; - } - - br_flood_forward(br, skb, 0); + return 0; -out: +drop: + kfree_skb(skb); return 0; } @@ -105,14 +104,15 @@ int br_handle_frame(struct net_bridge_po { struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; + struct net_bridge *br = rcu_dereference(p->br); - if (p->state == BR_STATE_DISABLED) + if (p->state == BR_STATE_DISABLED || !br) goto err; if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto err; - if (p->br->stp_enabled && + if (br->stp_enabled && !memcmp(dest, bridge_ula, 5) && !(dest[5] & 0xF0)) { if (!dest[5]) { @@ -131,7 +131,7 @@ int br_handle_frame(struct net_bridge_po dest = eth_hdr(skb)->h_dest; } - if (!compare_ether_addr(p->br->dev->dev_addr, dest)) + if (!compare_ether_addr(br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, --- br-fix.orig/net/bridge/br_notify.c +++ br-fix/net/bridge/br_notify.c @@ -39,7 +39,12 @@ static int br_device_event(struct notifi if (p == NULL) return NOTIFY_DONE; - br = p->br; + rcu_read_lock(); + br = rcu_dereference(p->br); + + /* lost race with br_del_if and RCU */ + if (br == NULL) + goto done; spin_lock_bh(&br->lock); switch (event) { @@ -48,7 +53,7 @@ static int br_device_event(struct notifi break; case NETDEV_CHANGEADDR: - br_fdb_changeaddr(p, dev->dev_addr); + br_fdb_changeaddr(br, p, dev->dev_addr); br_stp_recalculate_bridge_id(br); break; @@ -84,5 +89,6 @@ static int br_device_event(struct notifi spin_unlock_bh(&br->lock); done: + rcu_read_unlock(); return NOTIFY_DONE; } --- br-fix.orig/net/bridge/br_private.h +++ br-fix/net/bridge/br_private.h @@ -138,7 +138,7 @@ extern int br_dev_xmit(struct sk_buff *s /* br_fdb.c */ extern void br_fdb_init(void); extern void br_fdb_fini(void); -extern void br_fdb_changeaddr(struct net_bridge_port *p, +extern void br_fdb_changeaddr(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *newaddr); extern void br_fdb_cleanup(unsigned long arg); extern void br_fdb_delete_by_port(struct net_bridge *br, @@ -199,7 +199,8 @@ extern void br_log_state(const struct ne extern struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); extern void br_init_port(struct net_bridge_port *p); -extern void br_become_designated_port(struct net_bridge_port *p); +extern void br_become_designated_port(struct net_bridge *br, + struct net_bridge_port *p); /* br_stp_if.c */ extern void br_stp_enable_bridge(struct net_bridge *br); --- br-fix.orig/net/bridge/br_private_stp.h +++ br-fix/net/bridge/br_private_stp.h @@ -45,8 +45,11 @@ extern void br_become_root_bridge(struct extern void br_config_bpdu_generation(struct net_bridge *); extern void br_configuration_update(struct net_bridge *); extern void br_port_state_selection(struct net_bridge *); -extern void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu); -extern void br_received_tcn_bpdu(struct net_bridge_port *p); +extern void br_received_config_bpdu(struct net_bridge *br, + struct net_bridge_port *p, + const struct br_config_bpdu *bpdu); +extern void br_received_tcn_bpdu(struct net_bridge *br, + struct net_bridge_port *p); extern void br_transmit_config(struct net_bridge_port *p); extern void br_transmit_tcn(struct net_bridge *br); extern void br_topology_change_detection(struct net_bridge *br); --- br-fix.orig/net/bridge/br_stp.c +++ br-fix/net/bridge/br_stp.c @@ -53,14 +53,13 @@ struct net_bridge_port *br_get_port(stru } /* called under bridge lock */ -static int br_should_become_root_port(const struct net_bridge_port *p, +static int br_should_become_root_port(struct net_bridge *br, + const struct net_bridge_port *p, u16 root_port) { - struct net_bridge *br; struct net_bridge_port *rp; int t; - br = p->br; if (p->state == BR_STATE_DISABLED || br_is_designated_port(p)) return 0; @@ -110,7 +109,7 @@ static void br_root_selection(struct net u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { - if (br_should_become_root_port(p, root_port)) + if (br_should_become_root_port(br, p, root_port)) root_port = p->port_no; } @@ -148,7 +147,6 @@ void br_transmit_config(struct net_bridg struct br_config_bpdu bpdu; struct net_bridge *br; - if (timer_pending(&p->hold_timer)) { p->config_pending = 1; return; @@ -184,7 +182,8 @@ void br_transmit_config(struct net_bridg } /* called under bridge lock */ -static inline void br_record_config_information(struct net_bridge_port *p, +static inline void br_record_config_information(struct net_bridge *br, + struct net_bridge_port *p, const struct br_config_bpdu *bpdu) { p->designated_root = bpdu->root; @@ -193,7 +192,7 @@ static inline void br_record_config_info p->designated_port = bpdu->port_id; mod_timer(&p->message_age_timer, jiffies - + (p->br->max_age - bpdu->message_age)); + + (br->max_age - bpdu->message_age)); } /* called under bridge lock */ @@ -213,12 +212,11 @@ void br_transmit_tcn(struct net_bridge * } /* called under bridge lock */ -static int br_should_become_designated_port(const struct net_bridge_port *p) +static int br_should_become_designated_port(const struct net_bridge *br, + const struct net_bridge_port *p) { - struct net_bridge *br; int t; - br = p->br; if (br_is_designated_port(p)) return 1; @@ -249,14 +247,16 @@ static void br_designated_port_selection list_for_each_entry(p, &br->port_list, list) { if (p->state != BR_STATE_DISABLED && - br_should_become_designated_port(p)) - br_become_designated_port(p); + br_should_become_designated_port(br, p)) + br_become_designated_port(br,p); } } /* called under bridge lock */ -static int br_supersedes_port_info(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +static int br_supersedes_port_info(const struct net_bridge *br, + const struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { int t; @@ -277,7 +277,7 @@ static int br_supersedes_port_info(struc else if (t > 0) return 0; - if (memcmp(&bpdu->bridge_id, &p->br->bridge_id, 8)) + if (memcmp(&bpdu->bridge_id, &br->bridge_id, 8)) return 1; if (bpdu->port_id <= p->designated_port) @@ -339,11 +339,8 @@ void br_configuration_update(struct net_ } /* called under bridge lock */ -void br_become_designated_port(struct net_bridge_port *p) +void br_become_designated_port(struct net_bridge *br, struct net_bridge_port *p) { - struct net_bridge *br; - - br = p->br; p->designated_root = br->designated_root; p->designated_cost = br->root_path_cost; p->designated_bridge = br->bridge_id; @@ -367,16 +364,16 @@ static void br_make_blocking(struct net_ } /* called under bridge lock */ -static void br_make_forwarding(struct net_bridge_port *p) +static void br_make_forwarding(struct net_bridge *br, struct net_bridge_port *p) { if (p->state == BR_STATE_BLOCKING) { - if (p->br->stp_enabled) { + if (br->stp_enabled) p->state = BR_STATE_LISTENING; - } else { + else p->state = BR_STATE_LEARNING; - } br_log_state(p); - mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); } + mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay); + } } /* called under bridge lock */ @@ -389,10 +386,10 @@ void br_port_state_selection(struct net_ if (p->port_no == br->root_port) { p->config_pending = 0; p->topology_change_ack = 0; - br_make_forwarding(p); + br_make_forwarding(br, p); } else if (br_is_designated_port(p)) { del_timer(&p->message_age_timer); - br_make_forwarding(p); + br_make_forwarding(br, p); } else { p->config_pending = 0; p->topology_change_ack = 0; @@ -411,16 +408,13 @@ static inline void br_topology_change_ac } /* called under bridge lock */ -void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu) +void br_received_config_bpdu(struct net_bridge *br, struct net_bridge_port *p, + const struct br_config_bpdu *bpdu) { - struct net_bridge *br; - int was_root; - - br = p->br; - was_root = br_is_root_bridge(br); + int was_root = br_is_root_bridge(br); - if (br_supersedes_port_info(p, bpdu)) { - br_record_config_information(p, bpdu); + if (br_supersedes_port_info(br, p, bpdu)) { + br_record_config_information(br, p, bpdu); br_configuration_update(br); br_port_state_selection(br); @@ -447,13 +441,13 @@ void br_received_config_bpdu(struct net_ } /* called under bridge lock */ -void br_received_tcn_bpdu(struct net_bridge_port *p) +void br_received_tcn_bpdu(struct net_bridge *br, struct net_bridge_port *p) { if (br_is_designated_port(p)) { pr_info("%s: received tcn bpdu on port %i(%s)\n", - p->br->dev->name, p->port_no, p->dev->name); + br->dev->name, p->port_no, p->dev->name); - br_topology_change_detection(p->br); + br_topology_change_detection(br); br_topology_change_acknowledge(p); } } --- br-fix.orig/net/bridge/br_stp_bpdu.c +++ br-fix/net/bridge/br_stp_bpdu.c @@ -24,11 +24,12 @@ static void br_send_bpdu(struct net_bridge_port *p, unsigned char *data, int length) { + struct net_bridge *br = rcu_dereference(p->br); struct net_device *dev; struct sk_buff *skb; int size; - if (!p->br->stp_enabled) + if (!br || !br->stp_enabled) return; size = length + 2*ETH_ALEN + 2; @@ -137,11 +138,15 @@ static const unsigned char header[6] = { int br_stp_handle_bpdu(struct sk_buff *skb) { struct net_bridge_port *p = skb->dev->br_port; - struct net_bridge *br = p->br; + struct net_bridge *br; unsigned char *buf; + br = rcu_dereference(p->br); + if (!br) + goto err; + /* insert into forwarding database after filtering to avoid spoofing */ - br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + br_fdb_update(br, p, eth_hdr(skb)->h_source); /* need at least the 802 and STP headers */ if (!pskb_may_pull(skb, sizeof(header)+1) || @@ -194,11 +199,11 @@ int br_stp_handle_bpdu(struct sk_buff *s bpdu.hello_time = br_get_ticks(buf+28); bpdu.forward_delay = br_get_ticks(buf+30); - br_received_config_bpdu(p, &bpdu); + br_received_config_bpdu(br, p, &bpdu); } else if (buf[0] == BPDU_TYPE_TCN) { - br_received_tcn_bpdu(p); + br_received_tcn_bpdu(br, p); } out: spin_unlock_bh(&br->lock); --- br-fix.orig/net/bridge/br_stp_if.c +++ br-fix/net/bridge/br_stp_if.c @@ -35,7 +35,7 @@ static inline port_id br_make_port_id(__ void br_init_port(struct net_bridge_port *p) { p->port_id = br_make_port_id(p->priority, p->port_no); - br_become_designated_port(p); + br_become_designated_port(p->br, p); p->state = BR_STATE_BLOCKING; p->topology_change_ack = 0; p->config_pending = 0; @@ -102,7 +102,7 @@ void br_stp_disable_port(struct net_brid br->dev->name, p->port_no, p->dev->name, "disabled"); wasroot = br_is_root_bridge(br); - br_become_designated_port(p); + br_become_designated_port(br, p); p->state = BR_STATE_DISABLED; p->topology_change_ack = 0; p->config_pending = 0; @@ -194,6 +194,7 @@ void br_stp_set_bridge_priority(struct n /* called under bridge lock */ void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) { + struct net_bridge *br = p->br; port_id new_port_id = br_make_port_id(newprio, p->port_no); if (br_is_designated_port(p)) @@ -201,10 +202,10 @@ void br_stp_set_port_priority(struct net p->port_id = new_port_id; p->priority = newprio; - if (!memcmp(&p->br->bridge_id, &p->designated_bridge, 8) && + if (!memcmp(&br->bridge_id, &p->designated_bridge, 8) && p->port_id < p->designated_port) { - br_become_designated_port(p); - br_port_state_selection(p->br); + br_become_designated_port(br, p); + br_port_state_selection(br); } } --- br-fix.orig/net/bridge/br_stp_timer.c +++ br-fix/net/bridge/br_stp_timer.c @@ -76,7 +76,7 @@ static void br_message_age_timer_expired goto unlock; was_root = br_is_root_bridge(br); - br_become_designated_port(p); + br_become_designated_port(br, p); br_configuration_update(br); br_port_state_selection(br); if (br_is_root_bridge(br) && !was_root) --- br-fix.orig/net/bridge/br_sysfs_if.c +++ br-fix/net/bridge/br_sysfs_if.c @@ -173,6 +173,7 @@ static ssize_t brport_store(struct kobje { struct brport_attribute * brport_attr = to_brport_attr(attr); struct net_bridge_port * p = to_brport(kobj); + struct net_bridge *br; ssize_t ret = -EINVAL; char *endp; unsigned long val; @@ -183,10 +184,11 @@ static ssize_t brport_store(struct kobje val = simple_strtoul(buf, &endp, 0); if (endp != buf) { rtnl_lock(); - if (p->dev && p->br && brport_attr->store) { - spin_lock_bh(&p->br->lock); + br = rcu_dereference(p->br); + if (p->dev && br && brport_attr->store) { + spin_lock_bh(&br->lock); ret = brport_attr->store(p, val); - spin_unlock_bh(&p->br->lock); + spin_unlock_bh(&br->lock); if (ret == 0) ret = count; } - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html