* jamal <[EMAIL PROTECTED]> 2006-06-29 21:11 > Heres what it would look at ingress: > > step 0: coming from wire via eth0, > dev=eth0, input_dev=eth0 > > step 1: redirect to ifb0, leaving redirect > dev=ifb0, input_dev=eth0 > > step 2: leaving ifb0, coming back to ingress side of stack > > dev= eth0, input_dev=ifb0
That creates a nice loop on ingress. Upon reentering the stack with skb->dev set to eth0 again we'll go through the same ingress filters as the first time and we'll hit ifb0 again over and over. Are you suggesting everyone has to insert a pass action matching input_dev in order to escape the loop when using ifb? > > When leaving ifb0 you want for... > > ... egress: > > skb->dev=to (eth0) skb->iif=from (ifb0) > > ... ingress: > > skb->dev=at (ifb0) skb->iif=from (eth0) > > > > Yes, this is correct. I described the flow of the first one in the > earlier email and the ingress side. How can it be correct if it differs from your description above? What I described is what the patch changes it to. Looking closer at ifb it contains a race when updating skb->dev. Preempt has to be disabled when updating skb->dev before calling netif_rx() otherwise the device might disappear. [NET]: Use interface index to keep input device information Using the interface index instead of a direct reference allows a safe usage beyond the scope where an interface could disappear. The old input_dev field was incorrectly made dependant on CONFIG_NET_CLS_ACT in skb_copy(). The ifb device is fixed to set skb->dev in a manner that the device can't disappear before calling netif_rx() and the semantics are fixed so a packet reentering the stack looks like it would have been received on the ifb device. Signed-off-by: Thomas Graf <[EMAIL PROTECTED]> Index: net-2.6.git/include/linux/skbuff.h =================================================================== --- net-2.6.git.orig/include/linux/skbuff.h +++ net-2.6.git/include/linux/skbuff.h @@ -181,7 +181,6 @@ enum { * @sk: Socket we are owned by * @tstamp: Time we arrived * @dev: Device we arrived on/are leaving by - * @input_dev: Device we arrived on * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -192,6 +191,7 @@ enum { * @data_len: Data length * @mac_len: Length of link layer header * @csum: Checksum + * @iif: Device we arrived on * @local_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @nohdr: Payload reference only, must not modify header @@ -228,7 +228,6 @@ struct sk_buff { struct sock *sk; struct skb_timeval tstamp; struct net_device *dev; - struct net_device *input_dev; union { struct tcphdr *th; @@ -266,6 +265,7 @@ struct sk_buff { data_len, mac_len, csum; + int iif; __u32 priority; __u8 local_df:1, cloned:1, Index: net-2.6.git/include/net/pkt_cls.h =================================================================== --- net-2.6.git.orig/include/net/pkt_cls.h +++ net-2.6.git/include/net/pkt_cls.h @@ -352,14 +352,19 @@ tcf_change_indev(struct tcf_proto *tp, c static inline int tcf_match_indev(struct sk_buff *skb, char *indev) { + int ret = 1; + if (indev[0]) { - if (!skb->input_dev) - return 0; - if (strcmp(indev, skb->input_dev->name)) + struct net_device *dev; + + dev = dev_get_by_index(skb->iif); + if (!dev) return 0; + ret = !strcmp(indev, dev->name); + dev_put(dev); } - return 1; + return ret; } #endif /* CONFIG_NET_CLS_IND */ Index: net-2.6.git/net/core/dev.c =================================================================== --- net-2.6.git.orig/net/core/dev.c +++ net-2.6.git/net/core/dev.c @@ -1715,8 +1715,8 @@ static int ing_filter(struct sk_buff *sk if (dev->qdisc_ingress) { __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); if (MAX_RED_LOOP < ttl++) { - printk("Redir loop detected Dropping packet (%s->%s)\n", - skb->input_dev->name, skb->dev->name); + printk("Redir loop detected Dropping packet (%d->%s)\n", + skb->iif, skb->dev->name); return TC_ACT_SHOT; } @@ -1749,8 +1749,8 @@ int netif_receive_skb(struct sk_buff *sk if (!skb->tstamp.off_sec) net_timestamp(skb); - if (!skb->input_dev) - skb->input_dev = skb->dev; + if (!skb->iif) + skb->iif = skb->dev->ifindex; orig_dev = skb_bond(skb); Index: net-2.6.git/net/core/skbuff.c =================================================================== --- net-2.6.git.orig/net/core/skbuff.c +++ net-2.6.git/net/core/skbuff.c @@ -463,10 +463,10 @@ struct sk_buff *skb_clone(struct sk_buff n->tc_verd = SET_TC_VERD(skb->tc_verd,0); n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - C(input_dev); #endif skb_copy_secmark(n, skb); #endif + C(iif); C(truesize); atomic_set(&n->users, 1); C(head); Index: net-2.6.git/net/sched/act_api.c =================================================================== --- net-2.6.git.orig/net/sched/act_api.c +++ net-2.6.git/net/sched/act_api.c @@ -156,9 +156,8 @@ int tcf_action_exec(struct sk_buff *skb, if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n", - skb, skb->input_dev ? skb->input_dev->name : "xxx", - skb->dev->name); + D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %d out %s\n", + skb, skb->iif, skb->dev->name); ret = TC_ACT_OK; goto exec_done; } Index: net-2.6.git/net/sched/act_mirred.c =================================================================== --- net-2.6.git.orig/net/sched/act_mirred.c +++ net-2.6.git/net/sched/act_mirred.c @@ -207,7 +207,7 @@ bad_mirred: skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; - skb2->input_dev = skb->dev; + skb2->iif = skb->dev->ifindex; dev_queue_xmit(skb2); spin_unlock(&p->lock); return p->action; Index: net-2.6.git/drivers/net/ifb.c =================================================================== --- net-2.6.git.orig/drivers/net/ifb.c +++ net-2.6.git/drivers/net/ifb.c @@ -98,13 +98,41 @@ static void ri_tasklet(unsigned long dev stats->tx_packets++; stats->tx_bytes +=skb->len; if (from & AT_EGRESS) { + /* + * Skb was given to us at egress, direct it back + * to where it came from [iif] but update iif to + * signal its new origin. + */ + struct net_device *iif; + + iif = __dev_get_by_index(skb->iif); + if (!iif) + goto drop; + dp->st_rx_frm_egr++; + + /* Already holding a reference on iif netdevice. */ + skb->dev = iif; + skb->iif = _dev->ifindex; dev_queue_xmit(skb); } else if (from & AT_INGRESS) { - + /* + * Skb was given to us at ingress, reinject into + * stack as if it would have been received on + * this device. + */ dp->st_rx_frm_ing++; + + /* + * Disable preempt until holding new reference on + * skb->dev in netif_rx(). + */ + preempt_disable(); + skb->dev = _dev; netif_rx(skb); + preempt_enable(); } else { +drop: dev_kfree_skb(skb); stats->tx_dropped++; } @@ -158,7 +186,7 @@ static int ifb_xmit(struct sk_buff *skb, stats->tx_packets++; stats->tx_bytes+=skb->len; - if (!from || !skb->input_dev) { + if (!from || !skb->iif) { dropped: dev_kfree_skb(skb); stats->rx_dropped++; @@ -169,8 +197,6 @@ dropped: * ingress -> egress or * egress -> ingress */ - skb->dev = skb->input_dev; - skb->input_dev = dev; if (from & AT_INGRESS) { skb_pull(skb, skb->dev->hard_header_len); } else { - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html