On Wed, Oct 15, 2014 at 06:25:25PM +0800, Jason Wang wrote:
> On 10/15/2014 06:18 PM, Michael S. Tsirkin wrote:
> > On Wed, Oct 15, 2014 at 03:25:29PM +0800, Jason Wang wrote:
> >> > Orphan skb in ndo_start_xmit() breaks socket accounting and packet
> >> > queuing. This in fact breaks lots of things such as pktgen and several
> >> > TCP optimizations. And also make BQL can't be implemented for
> >> > virtio-net.
> >> > 
> >> > This patch tries to solve this issue by enabling tx interrupt. To
> >> > avoid introducing extra spinlocks, a tx napi was scheduled to free
> >> > those packets.
> >> > 
> >> > More tx interrupt mitigation method could be used on top.
> >> > 
> >> > Cc: Rusty Russell <ru...@rustcorp.com.au>
> >> > Cc: Michael S. Tsirkin <m...@redhat.com>
> >> > Signed-off-by: Jason Wang <jasow...@redhat.com>
> >> > ---
> >> >  drivers/net/virtio_net.c |  125 
> >> > +++++++++++++++++++++++++++++++---------------
> >> >  1 files changed, 85 insertions(+), 40 deletions(-)
> >> > 
> >> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> >> > index ccf98f9..2afc2e2 100644
> >> > --- a/drivers/net/virtio_net.c
> >> > +++ b/drivers/net/virtio_net.c
> >> > @@ -72,6 +72,8 @@ struct send_queue {
> >> >  
> >> >          /* Name of the send queue: output.$index */
> >> >          char name[40];
> >> > +
> >> > +        struct napi_struct napi;
> >> >  };
> >> >  
> >> >  /* Internal representation of a receive virtqueue */
> >> > @@ -217,15 +219,40 @@ static struct page *get_a_page(struct 
> >> > receive_queue *rq, gfp_t gfp_mask)
> >> >          return p;
> >> >  }
> >> >  
> >> > +static int free_old_xmit_skbs(struct send_queue *sq, int budget)
> >> > +{
> >> > +        struct sk_buff *skb;
> >> > +        unsigned int len;
> >> > +        struct virtnet_info *vi = sq->vq->vdev->priv;
> >> > +        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> >> > +        u64 tx_bytes = 0, tx_packets = 0;
> >> > +
> >> > +        while (tx_packets < budget &&
> >> > +               (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> >> > +                pr_debug("Sent skb %p\n", skb);
> >> > +
> >> > +                tx_bytes += skb->len;
> >> > +                tx_packets++;
> >> > +
> >> > +                dev_kfree_skb_any(skb);
> >> > +        }
> >> > +
> >> > +        u64_stats_update_begin(&stats->tx_syncp);
> >> > +        stats->tx_bytes += tx_bytes;
> >> > +        stats->tx_packets =+ tx_packets;
> >> > +        u64_stats_update_end(&stats->tx_syncp);
> >> > +
> >> > +        return tx_packets;
> >> > +}
> >> > +
> >> >  static void skb_xmit_done(struct virtqueue *vq)
> >> >  {
> >> >          struct virtnet_info *vi = vq->vdev->priv;
> >> > +        struct send_queue *sq = &vi->sq[vq2txq(vq)];
> >> >  
> >> > -        /* Suppress further interrupts. */
> >> > -        virtqueue_disable_cb(vq);
> >> > -
> >> > -        /* We were probably waiting for more output buffers. */
> >> > -        netif_wake_subqueue(vi->dev, vq2txq(vq));
> >> > +        if (napi_schedule_prep(&sq->napi)) {
> >> > +                __napi_schedule(&sq->napi);
> >> > +        }
> >> >  }
> >> >  
> >> >  static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
> >> > @@ -774,7 +801,39 @@ again:
> >> >          return received;
> >> >  }
> >> >  
> >> > +static int virtnet_poll_tx(struct napi_struct *napi, int budget)
> >> > +{
> >> > +        struct send_queue *sq =
> >> > +                container_of(napi, struct send_queue, napi);
> >> > +        struct virtnet_info *vi = sq->vq->vdev->priv;
> >> > +        struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, 
> >> > vq2txq(sq->vq));
> >> > +        unsigned int r, sent = 0;
> >> > +
> >> > +again:
> >> > +        __netif_tx_lock(txq, smp_processor_id());
> >> > +        virtqueue_disable_cb(sq->vq);
> >> > +        sent += free_old_xmit_skbs(sq, budget - sent);
> >> > +
> >> > +        if (sent < budget) {
> >> > +                r = virtqueue_enable_cb_prepare(sq->vq);
> >> > +                napi_complete(napi);
> >> > +                __netif_tx_unlock(txq);
> >> > +                if (unlikely(virtqueue_poll(sq->vq, r)) &&
> > So you are enabling callback on the next packet,
> > which is almost sure to cause an interrupt storm
> > on the guest.
> >
> >
> > I think it's a bad idea, this is why I used
> > enable_cb_delayed in my patch.
> 
> Right, will do this, but may also need to make sure used event never
> goes back since we may call virtqueue_enable_cb_avail().

That's why my patch always calls virtqueue_enable_cb_delayed.
So no need for hacks.

Maybe you can review my patch and comment?




> >
> >
> >> > +                    napi_schedule_prep(napi)) {
> >> > +                        virtqueue_disable_cb(sq->vq);
> >> > +                        __napi_schedule(napi);
> >> > +                        goto again;
> >> > +                }
> >> > +        } else {
> >> > +                __netif_tx_unlock(txq);
> >> > +        }
> >> > +
> >> > +        netif_wake_subqueue(vi->dev, vq2txq(sq->vq));
> >> > +        return sent;
> >> > +}
> >> > +
> >> >  #ifdef CONFIG_NET_RX_BUSY_POLL
> >> > +
> >> >  /* must be called with local_bh_disable()d */
> >> >  static int virtnet_busy_poll(struct napi_struct *napi)
> >> >  {
> >> > @@ -822,36 +881,12 @@ static int virtnet_open(struct net_device *dev)
> >> >                          if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> >> >                                  schedule_delayed_work(&vi->refill, 0);
> >> >                  virtnet_napi_enable(&vi->rq[i]);
> >> > +                napi_enable(&vi->sq[i].napi);
> >> >          }
> >> >  
> >> >          return 0;
> >> >  }
> >> >  
> >> > -static int free_old_xmit_skbs(struct send_queue *sq)
> >> > -{
> >> > -        struct sk_buff *skb;
> >> > -        unsigned int len;
> >> > -        struct virtnet_info *vi = sq->vq->vdev->priv;
> >> > -        struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> >> > -        u64 tx_bytes = 0, tx_packets = 0;
> >> > -
> >> > -        while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> >> > -                pr_debug("Sent skb %p\n", skb);
> >> > -
> >> > -                tx_bytes += skb->len;
> >> > -                tx_packets++;
> >> > -
> >> > -                dev_kfree_skb_any(skb);
> >> > -        }
> >> > -
> >> > -        u64_stats_update_begin(&stats->tx_syncp);
> >> > -        stats->tx_bytes += tx_bytes;
> >> > -        stats->tx_packets =+ tx_packets;
> >> > -        u64_stats_update_end(&stats->tx_syncp);
> >> > -
> >> > -        return tx_packets;
> >> > -}
> >> > -
> > So you end up moving it all anyway, why bother splitting out
> > minor changes in previous patches?
> 
> To make review easier, but if you think this complicated it in fact,
> will pack them into a single patch.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to