IPoIB: implement the new batching API. Signed-off-by: Krishna Kumar <[EMAIL PROTECTED]> --- ipoib_main.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 184 insertions(+), 5 deletions(-)
diff -ruNp ORG/drivers/infiniband/ulp/ipoib/ipoib_main.c NEW/drivers/infiniband/ulp/ipoib/ipoib_main.c --- ORG/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-07-12 08:55:06.000000000 +0530 +++ NEW/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-08-07 13:11:19.000000000 +0530 @@ -558,7 +558,8 @@ static void neigh_add_path(struct sk_buf goto err_drop; } } else - ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + ipoib_send(dev, skb, path->ah, + IPOIB_QPN(skb->dst->neighbour->ha), 1); } else { neigh->ah = NULL; @@ -638,7 +639,7 @@ static void unicast_arp_send(struct sk_b ipoib_dbg(priv, "Send unicast ARP to %04x\n", be16_to_cpu(path->pathrec.dlid)); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr), 1); } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { /* put pseudoheader back on for next time */ @@ -704,7 +705,8 @@ static int ipoib_start_xmit(struct sk_bu goto out; } - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); + ipoib_send(dev, skb, neigh->ah, + IPOIB_QPN(skb->dst->neighbour->ha), 1); goto out; } @@ -753,6 +755,153 @@ out: return NETDEV_TX_OK; } +#define XMIT_QUEUED_SKBS() \ + do { \ + if (wr_num) { \ + ipoib_send(dev, NULL, old_neigh->ah, old_qpn, \ + wr_num); \ + wr_num = 0; \ + } \ + } while (0) + +/* + * TODO: Merge with ipoib_start_xmit to use the same code and have a + * transparent wrapper caller to xmit's, etc. Status: Done, needs testing. + */ +static int ipoib_start_xmit_frames(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + struct sk_buff_head *blist = dev->skb_blist; + int max_skbs, wr_num = 0; + u32 qpn, old_qpn = 0; + struct ipoib_neigh *neigh, *old_neigh = NULL; + unsigned long flags; + + if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) + return NETDEV_TX_LOCKED; + + /* + * Figure out how many skbs can be sent. This prevents the device + * getting full and avoids checking for queue stopped after each + * iteration. + */ + max_skbs = ipoib_sendq_size - (priv->tx_head - priv->tx_tail); + while (max_skbs-- > 0 && (skb = __skb_dequeue(blist)) != NULL) { + if (likely(skb->dst && skb->dst->neighbour)) { + if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { + XMIT_QUEUED_SKBS(); + ipoib_path_lookup(skb, dev); + continue; + } + + neigh = *to_ipoib_neigh(skb->dst->neighbour); + + if (ipoib_cm_get(neigh)) { + if (ipoib_cm_up(neigh)) { + XMIT_QUEUED_SKBS(); + ipoib_cm_send(dev, skb, + ipoib_cm_get(neigh)); + continue; + } + } else if (neigh->ah) { + if (unlikely(memcmp(&neigh->dgid.raw, + skb->dst->neighbour->ha + 4, + sizeof(union ib_gid)))) { + spin_lock(&priv->lock); + /* + * It's safe to call ipoib_put_ah() + * inside priv->lock here, because we + * know that path->ah will always hold + * one more reference, so ipoib_put_ah() + * will never do more than decrement + * the ref count. + */ + ipoib_put_ah(neigh->ah); + list_del(&neigh->list); + ipoib_neigh_free(dev, neigh); + spin_unlock(&priv->lock); + XMIT_QUEUED_SKBS(); + ipoib_path_lookup(skb, dev); + continue; + } + + qpn = IPOIB_QPN(skb->dst->neighbour->ha); + if (neigh != old_neigh || qpn != old_qpn) { + /* + * Sending to a different destination + * from earlier skb's - send all + * existing skbs (if any), and restart. + */ + XMIT_QUEUED_SKBS(); + old_neigh = neigh; + old_qpn = qpn; + } + + if (likely(!ipoib_process_skb(dev, skb, priv, + neigh->ah, qpn, + wr_num))) + wr_num++; + + continue; + } + + if (skb_queue_len(&neigh->queue) < + IPOIB_MAX_PATH_REC_QUEUE) { + spin_lock(&priv->lock); + __skb_queue_tail(&neigh->queue, skb); + spin_unlock(&priv->lock); + } else { + dev_kfree_skb_any(skb); + ++priv->stats.tx_dropped; + ++max_skbs; + } + } else { + struct ipoib_pseudoheader *phdr = + (struct ipoib_pseudoheader *) skb->data; + skb_pull(skb, sizeof *phdr); + + if (phdr->hwaddr[4] == 0xff) { + /* Add in the P_Key for multicast*/ + phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; + phdr->hwaddr[9] = priv->pkey & 0xff; + + XMIT_QUEUED_SKBS(); + ipoib_mcast_send(dev, phdr->hwaddr + 4, skb); + } else { + /* unicast GID -- should be ARP or RARP reply */ + + if ((be16_to_cpup((__be16 *) skb->data) != + ETH_P_ARP) && + (be16_to_cpup((__be16 *) skb->data) != + ETH_P_RARP)) { + ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x " + IPOIB_GID_FMT "\n", + skb->dst ? "neigh" : "dst", + be16_to_cpup((__be16 *) + skb->data), + IPOIB_QPN(phdr->hwaddr), + IPOIB_GID_RAW_ARG(phdr->hwaddr + + 4)); + dev_kfree_skb_any(skb); + ++priv->stats.tx_dropped; + ++max_skbs; + continue; + } + XMIT_QUEUED_SKBS(); + unicast_arp_send(skb, dev, phdr); + } + } + } + + /* Send out last packets (if any) */ + XMIT_QUEUED_SKBS(); + + spin_unlock_irqrestore(&priv->tx_lock, flags); + + return skb_queue_empty(blist) ? NETDEV_TX_OK : NETDEV_TX_BUSY; +} + static struct net_device_stats *ipoib_get_stats(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -896,13 +1045,37 @@ int ipoib_dev_init(struct net_device *de goto out_rx_ring_cleanup; } - /* priv->tx_head & tx_tail are already 0 */ + /* priv->tx_head & tx_tail & tx_priv_tail are already 0 */ - if (ipoib_ib_dev_init(dev, ca, port)) + /* Allocate tx_sge */ + priv->tx_sge = kmalloc(ipoib_sendq_size * sizeof *priv->tx_sge, + GFP_KERNEL); + if (!priv->tx_sge) { + printk(KERN_WARNING "%s: failed to allocate TX sge (%d entries)\n", + ca->name, ipoib_sendq_size); goto out_tx_ring_cleanup; + } + + /* Allocate tx_wr */ + priv->tx_wr = kmalloc(ipoib_sendq_size * sizeof *priv->tx_wr, + GFP_KERNEL); + if (!priv->tx_wr) { + printk(KERN_WARNING "%s: failed to allocate TX wr (%d entries)\n", + ca->name, ipoib_sendq_size); + goto out_tx_sge_cleanup; + } + + if (ipoib_ib_dev_init(dev, ca, port)) + goto out_tx_wr_cleanup; return 0; +out_tx_wr_cleanup: + kfree(priv->tx_wr); + +out_tx_sge_cleanup: + kfree(priv->tx_sge); + out_tx_ring_cleanup: kfree(priv->tx_ring); @@ -930,9 +1103,13 @@ void ipoib_dev_cleanup(struct net_device kfree(priv->rx_ring); kfree(priv->tx_ring); + kfree(priv->tx_sge); + kfree(priv->tx_wr); priv->rx_ring = NULL; priv->tx_ring = NULL; + priv->tx_sge = NULL; + priv->tx_wr = NULL; } static void ipoib_setup(struct net_device *dev) @@ -943,6 +1120,7 @@ static void ipoib_setup(struct net_devic dev->stop = ipoib_stop; dev->change_mtu = ipoib_change_mtu; dev->hard_start_xmit = ipoib_start_xmit; + dev->hard_start_xmit_batch = ipoib_start_xmit_frames; dev->get_stats = ipoib_get_stats; dev->tx_timeout = ipoib_timeout; dev->hard_header = ipoib_hard_header; @@ -979,6 +1157,7 @@ static void ipoib_setup(struct net_devic spin_lock_init(&priv->lock); spin_lock_init(&priv->tx_lock); + spin_lock_init(&priv->comp_lock); mutex_init(&priv->mcast_mutex); mutex_init(&priv->vlan_mutex); - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html