Re: [PATCH 1/2] NET: Multiple queue network device support

Thomas Graf Fri, 09 Mar 2007 05:40:37 -0800

* Kok, Auke <[EMAIL PROTECTED]> 2007-02-08 16:09
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 455d589..42b635c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1477,6 +1477,49 @@ gso:
>       skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
>  #endif
>       if (q->enqueue) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +             int queue_index;
> +             /* If we're a multi-queue device, get a queue index to lock */
> +             if (netif_is_multiqueue(dev))
> +             {
> +                     /* Get the queue index and lock it. */
> +                     if (likely(q->ops->map_queue)) {
> +                             queue_index = q->ops->map_queue(skb, q);
> +                             
> spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> +                             rc = q->enqueue(skb, q);
> +                             /*
> +                              * Unlock because the underlying qdisc
> +                              * may queue and send a packet from a
> +                              * different queue.
> +                              */
> +                             
> spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> +                             qdisc_run(dev);


I really dislike this asymmetric locking logic, while enqueue() is called
with queue_lock held dequeue() is repsonsible to acquire the lock for
qdisc_restart().

> +                             rc = rc == NET_XMIT_BYPASS
> +                                        ? NET_XMIT_SUCCESS : rc;
> +                             goto out;
> +                     } else {
> +                             printk(KERN_CRIT "Device %s is "
> +                                     "multiqueue, but map_queue is "
> +                                     "undefined in the qdisc!!\n",
> +                                     dev->name);
> +                             kfree_skb(skb);

Move this check to tc_modify_qdisc(), it's useless to check this for
every packet being delivered.

> +                     }
> +             } else {
> +                     /* We're not a multi-queue device. */
> +                     spin_lock(&dev->queue_lock);
> +                     q = dev->qdisc;
> +                     if (q->enqueue) {
> +                             rc = q->enqueue(skb, q);
> +                             qdisc_run(dev);
> +                             spin_unlock(&dev->queue_lock);
> +                             rc = rc == NET_XMIT_BYPASS
> +                                        ? NET_XMIT_SUCCESS : rc;
> +                             goto out;
> +                     }
> +                     spin_unlock(&dev->queue_lock);

Please don't duplicate already existing code.

> @@ -130,6 +140,72 @@ static inline int qdisc_restart(struct net_device *dev)
>               }
>               
>               {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +                     if (netif_is_multiqueue(dev)) {
> +                             if (likely(q->ops->map_queue)) {
> +                                     queue_index = q->ops->map_queue(skb, q);
> +                             } else {
> +                                     printk(KERN_CRIT "Device %s is "
> +                                             "multiqueue, but map_queue is "
> +                                             "undefined in the qdisc!!\n",
> +                                             dev->name);
> +                                     goto requeue;
> +                             }

Yet another condition completely useless for every transmission.

> +                             
> spin_unlock(&dev->egress_subqueue[queue_index].queue_lock);
> +                             /* Check top level device, and any sub-device */
> +                             if ((!netif_queue_stopped(dev)) &&
> +                               (!netif_subqueue_stopped(dev, queue_index))) {
> +                                     int ret;
> +                                     ret = 
> dev->hard_start_subqueue_xmit(skb, dev, queue_index);
> +                                     if (ret == NETDEV_TX_OK) {
> +                                             if (!nolock) {
> +                                                     netif_tx_unlock(dev);
> +                                             }
> +                                             return -1;
> +                                     }
> +                                     if (ret == NETDEV_TX_LOCKED && nolock) {
> +                                             
> spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> +                                             goto collision;
> +                                     }
> +                             }
> +                             /* NETDEV_TX_BUSY - we need to requeue */
> +                             /* Release the driver */
> +                             if (!nolock) {
> +                                     netif_tx_unlock(dev);
> +                             }
> +                             
> spin_lock(&dev->egress_subqueue[queue_index].queue_lock);
> +                             q = dev->qdisc;

This is identical to the existing logic except for the different lock,
the additional to check subqueue state and a different hard_start_xmit
call. Share the logic.

> +                     }
> +                     else
> +                     {
> +                             /* We're a single-queue device */
> +                             /* And release queue */
> +                             spin_unlock(&dev->queue_lock);
> +                             if (!netif_queue_stopped(dev)) {
> +                                     int ret;
> +
> +                                     ret = dev->hard_start_xmit(skb, dev);
> +                                     if (ret == NETDEV_TX_OK) {
> +                                             if (!nolock) {
> +                                                     netif_tx_unlock(dev);
> +                                             }
> +                                             spin_lock(&dev->queue_lock);
> +                                             return -1;
> +                                     }
> +                                     if (ret == NETDEV_TX_LOCKED && nolock) {
> +                                             spin_lock(&dev->queue_lock);
> +                                             goto collision;
> +                                     }
> +                             }
> +                             /* NETDEV_TX_BUSY - we need to requeue */
> +                             /* Release the driver */
> +                             if (!nolock) {
> +                                     netif_tx_unlock(dev);
> +                             }
> +                             spin_lock(&dev->queue_lock);
> +                             q = dev->qdisc;

Again, you just copied the existing code into a separate branch, fix the
branching logic!

> @@ -356,10 +454,18 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* 
> qdisc)
>       struct sk_buff_head *list = qdisc_priv(qdisc);
>  
>       for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +             if (netif_is_multiqueue(qdisc->dev))
> +                     spin_lock(&qdisc->dev->egress_subqueue[0].queue_lock);
> +#endif
>               if (!skb_queue_empty(list + prio)) {
>                       qdisc->q.qlen--;
>                       return __qdisc_dequeue_head(qdisc, list + prio);
>               }
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +             if (netif_is_multiqueue(qdisc->dev))
> +                     spin_unlock(&qdisc->dev->egress_subqueue[0].queue_lock);
> +#endif

This lock/unlock for every band definitely screws performance.

>       }
>  
>       return NULL;
> @@ -141,18 +174,53 @@ prio_dequeue(struct Qdisc* sch)
>       struct sk_buff *skb;
>       struct prio_sched_data *q = qdisc_priv(sch);
>       int prio;
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +     int queue;
> +#endif
>       struct Qdisc *qdisc;
>  
> +     /*
> +      * If we're multiqueue, the basic approach is try the lock on each
> +      * queue.  If it's locked, either we're already dequeuing, or enqueue
> +      * is doing something.  Go to the next band if we're locked.  Once we
> +      * have a packet, unlock the queue.  NOTE: the underlying qdisc CANNOT
> +      * be a PRIO qdisc, otherwise we will deadlock.  FIXME
> +      */
>       for (prio = 0; prio < q->bands; prio++) {
> +#ifdef CONFIG_NET_MULTI_QUEUE_DEVICE
> +             if (netif_is_multiqueue(sch->dev)) {
> +                     queue = q->band2queue[prio];
> +                     if 
> (spin_trylock(&sch->dev->egress_subqueue[queue].queue_lock)) {
> +                             qdisc = q->queues[prio];
> +                             skb = qdisc->dequeue(qdisc);
> +                             if (skb) {
> +                                     sch->q.qlen--;
> +                                     skb->priority = prio;
> +                                     
> spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock);
> +                                     return skb;
> +                             }
> +                             
> spin_unlock(&sch->dev->egress_subqueue[queue].queue_lock);
> +                     }

Your modified qdisc_restart() expects the queue_lock to be locked, how
can this work?

> +             } else {
> +                     qdisc = q->queues[prio];
> +                     skb = qdisc->dequeue(qdisc);
> +                     if (skb) {
> +                             sch->q.qlen--;
> +                             skb->priority = prio;
> +                             return skb;
> +                     }
> +             }
> +#else
>               qdisc = q->queues[prio];
>               skb = qdisc->dequeue(qdisc);
>               if (skb) {
>                       sch->q.qlen--;
> +                     skb->priority = prio;
>                       return skb;
>               }
> +#endif
>       }
>       return NULL;
> -
>  }
>  
>  static unsigned int prio_drop(struct Qdisc* sch)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] NET: Multiple queue network device support

Reply via email to