* Kok, Auke <[EMAIL PROTECTED]> 2007-02-08 16:09 > diff --git a/net/core/dev.c b/net/core/dev.c > index 455d589..42b635c 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -1477,6 +1477,49 @@ gso: > skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); > #endif > if (q->enqueue) { > +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE > + int queue_index; > + /* If we're a multi-queue device, get a queue index to lock */ > + if (netif_is_multiqueue(dev)) > + { > + /* Get the queue index and lock it. */ > + if (likely(q->ops->map_queue)) { > + queue_index = q->ops->map_queue(skb, q); > + > spin_lock(&dev->egress_subqueue[queue_index].queue_lock); > + rc = q->enqueue(skb, q); > + /* > + * Unlock because the underlying qdisc > + * may queue and send a packet from a > + * different queue. > + */ > + > spin_unlock(&dev->egress_subqueue[queue_index].queue_lock); > + qdisc_run(dev);
I really dislike this asymmetric locking logic, while enqueue() is called with queue_lock held dequeue() is repsonsible to acquire the lock for qdisc_restart(). > + rc = rc == NET_XMIT_BYPASS > + ? NET_XMIT_SUCCESS : rc; > + goto out; > + } else { > + printk(KERN_CRIT "Device %s is " > + "multiqueue, but map_queue is " > + "undefined in the qdisc!!\n", > + dev->name); > + kfree_skb(skb); Move this check to tc_modify_qdisc(), it's useless to check this for every packet being delivered. > + } > + } else { > + /* We're not a multi-queue device. */ > + spin_lock(&dev->queue_lock); > + q = dev->qdisc; > + if (q->enqueue) { > + rc = q->enqueue(skb, q); > + qdisc_run(dev); > + spin_unlock(&dev->queue_lock); > + rc = rc == NET_XMIT_BYPASS > + ? NET_XMIT_SUCCESS : rc; > + goto out; > + } > + spin_unlock(&dev->queue_lock); Please don't duplicate already existing code. > @@ -130,6 +140,72 @@ static inline int qdisc_restart(struct net_device *dev) > } > > { > +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE > + if (netif_is_multiqueue(dev)) { > + if (likely(q->ops->map_queue)) { > + queue_index = q->ops->map_queue(skb, q); > + } else { > + printk(KERN_CRIT "Device %s is " > + "multiqueue, but map_queue is " > + "undefined in the qdisc!!\n", > + dev->name); > + goto requeue; > + } Yet another condition completely useless for every transmission. > + > spin_unlock(&dev->egress_subqueue[queue_index].queue_lock); > + /* Check top level device, and any sub-device */ > + if ((!netif_queue_stopped(dev)) && > + (!netif_subqueue_stopped(dev, queue_index))) { > + int ret; > + ret = > dev->hard_start_subqueue_xmit(skb, dev, queue_index); > + if (ret == NETDEV_TX_OK) { > + if (!nolock) { > + netif_tx_unlock(dev); > + } > + return -1; > + } > + if (ret == NETDEV_TX_LOCKED && nolock) { > + > spin_lock(&dev->egress_subqueue[queue_index].queue_lock); > + goto collision; > + } > + } > + /* NETDEV_TX_BUSY - we need to requeue */ > + /* Release the driver */ > + if (!nolock) { > + netif_tx_unlock(dev); > + } > + > spin_lock(&dev->egress_subqueue[queue_index].queue_lock); > + q = dev->qdisc; This is identical to the existing logic except for the different lock, the additional to check subqueue state and a different hard_start_xmit call. Share the logic. > + } > + else > + { > + /* We're a single-queue device */ > + /* And release queue */ > + spin_unlock(&dev->queue_lock); > + if (!netif_queue_stopped(dev)) { > + int ret; > + > + ret = dev->hard_start_xmit(skb, dev); > + if (ret == NETDEV_TX_OK) { > + if (!nolock) { > + netif_tx_unlock(dev); > + } > + spin_lock(&dev->queue_lock); > + return -1; > + } > + if (ret == NETDEV_TX_LOCKED && nolock) { > + spin_lock(&dev->queue_lock); > + goto collision; > + } > + } > + /* NETDEV_TX_BUSY - we need to requeue */ > + /* Release the driver */ > + if (!nolock) { > + netif_tx_unlock(dev); > + } > + spin_lock(&dev->queue_lock); > + q = dev->qdisc; Again, you just copied the existing code into a separate branch, fix the branching logic! > @@ -356,10 +454,18 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* > qdisc) > struct sk_buff_head *list = qdisc_priv(qdisc); > > for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) { > +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE > + if (netif_is_multiqueue(qdisc->dev)) > + spin_lock(&qdisc->dev->egress_subqueue[0].queue_lock); > +#endif > if (!skb_queue_empty(list + prio)) { > qdisc->q.qlen--; > return __qdisc_dequeue_head(qdisc, list + prio); > } > +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE > + if (netif_is_multiqueue(qdisc->dev)) > + spin_unlock(&qdisc->dev->egress_subqueue[0].queue_lock); > +#endif This lock/unlock for every band definitely screws performance. > } > > return NULL; > @@ -141,18 +174,53 @@ prio_dequeue(struct Qdisc* sch) > struct sk_buff *skb; > struct prio_sched_data *q = qdisc_priv(sch); > int prio; > +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE > + int queue; > +#endif > struct Qdisc *qdisc; > > + /* > + * If we're multiqueue, the basic approach is try the lock on each > + * queue. If it's locked, either we're already dequeuing, or enqueue > + * is doing something. Go to the next band if we're locked. Once we > + * have a packet, unlock the queue. NOTE: the underlying qdisc CANNOT > + * be a PRIO qdisc, otherwise we will deadlock. FIXME > + */ > for (prio = 0; prio < q->bands; prio++) { > +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE > + if (netif_is_multiqueue(sch->dev)) { > + queue = q->band2queue[prio]; > + if > (spin_trylock(&sch->dev->egress_subqueue[queue].queue_lock)) { > + qdisc = q->queues[prio]; > + skb = qdisc->dequeue(qdisc); > + if (skb) { > + sch->q.qlen--; > + skb->priority = prio; > + > spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock); > + return skb; > + } > + > spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock); > + } Your modified qdisc_restart() expects the queue_lock to be locked, how can this work? > + } else { > + qdisc = q->queues[prio]; > + skb = qdisc->dequeue(qdisc); > + if (skb) { > + sch->q.qlen--; > + skb->priority = prio; > + return skb; > + } > + } > +#else > qdisc = q->queues[prio]; > skb = qdisc->dequeue(qdisc); > if (skb) { > sch->q.qlen--; > + skb->priority = prio; > return skb; > } > +#endif > } > return NULL; > - > } > > static unsigned int prio_drop(struct Qdisc* sch) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/