> -----Original Message-----
> From: David Miller [mailto:da...@davemloft.net]
> Sent: Wednesday, May 13, 2015 9:52 PM
> To: Simon Xiao
> Cc: KY Srinivasan; Haiyang Zhang; de...@linuxdriverproject.org;
> net...@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next,1/1] hv_netvsc: use per_cpu stats to calculate
> TX/RX data
> 
> From: six...@microsoft.com
> Date: Tue, 12 May 2015 15:50:02 -0700
> 
> > From: Simon Xiao <six...@microsoft.com>
> >
> > Current code does not lock anything when calculating the TX and RX stats.
> > As a result, the RX and TX data reported by ifconfig are not accuracy
> > in a system with high network throughput and multiple CPUs (in my
> > test, RX/TX = 83% between 2 HyperV VM nodes which have 8 vCPUs and 40G
> Ethernet).
> >
> > This patch fixed the above issue by using per_cpu stats.
> > netvsc_get_stats64() summarizes TX and RX data by iterating over all
> > CPUs to get their respective stats.
> >
> > Signed-off-by: Simon Xiao <six...@microsoft.com>
> > Reviewed-by: K. Y. Srinivasan <k...@microsoft.com>
> > Reviewed-by: Haiyang Zhang <haiya...@microsoft.com>
> > ---
> >  drivers/net/hyperv/hyperv_net.h |  9 +++++
> > drivers/net/hyperv/netvsc_drv.c | 80
> > ++++++++++++++++++++++++++++++++++++-----
> >  2 files changed, 81 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/hyperv/hyperv_net.h
> > b/drivers/net/hyperv/hyperv_net.h index 41071d3..5a92b36 100644
> > --- a/drivers/net/hyperv/hyperv_net.h
> > +++ b/drivers/net/hyperv/hyperv_net.h
> > @@ -611,6 +611,12 @@ struct multi_send_data {
> >     u32 count; /* counter of batched packets */  };
> >
> > +struct netvsc_stats {
> > +   u64 packets;
> > +   u64 bytes;
> > +   struct u64_stats_sync s_sync;
> > +};
> > +
> >  /* The context of the netvsc device  */  struct net_device_context {
> >     /* point back to our device context */ @@ -618,6 +624,9 @@ struct
> > net_device_context {
> >     struct delayed_work dwork;
> >     struct work_struct work;
> >     u32 msg_enable; /* debug level */
> > +
> > +   struct netvsc_stats __percpu *tx_stats;
> > +   struct netvsc_stats __percpu *rx_stats;
> >  };
> >
> >  /* Per netvsc device */
> > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > b/drivers/net/hyperv/netvsc_drv.c index 5993c7e..310b902 100644
> > --- a/drivers/net/hyperv/netvsc_drv.c
> > +++ b/drivers/net/hyperv/netvsc_drv.c
> > @@ -391,7 +391,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct
> net_device *net)
> >     u32 skb_length;
> >     u32 pkt_sz;
> >     struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
> > -
> > +   struct netvsc_stats *tx_stats =
> > +this_cpu_ptr(net_device_ctx->tx_stats);
> >
> >     /* We will atmost need two pages to describe the rndis
> >      * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
> @@
> > -580,8 +580,10 @@ do_send:
> >
> >  drop:
> >     if (ret == 0) {
> > -           net->stats.tx_bytes += skb_length;
> > -           net->stats.tx_packets++;
> > +           u64_stats_update_begin(&tx_stats->s_sync);
> > +           tx_stats->packets++;
> > +           tx_stats->bytes += skb_length;
> > +           u64_stats_update_end(&tx_stats->s_sync);
> >     } else {
> >             if (ret != -EAGAIN) {
> >                     dev_kfree_skb_any(skb);
> > @@ -644,13 +646,17 @@ int netvsc_recv_callback(struct hv_device
> *device_obj,
> >                             struct ndis_tcp_ip_checksum_info *csum_info)
> {
> >     struct net_device *net;
> > +   struct net_device_context *net_device_ctx;
> >     struct sk_buff *skb;
> > +   struct netvsc_stats *rx_stats;
> >
> >     net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
> >     if (!net || net->reg_state != NETREG_REGISTERED) {
> >             packet->status = NVSP_STAT_FAIL;
> >             return 0;
> >     }
> > +   net_device_ctx = netdev_priv(net);
> > +   rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
> >
> >     /* Allocate a skb - TODO direct I/O to pages? */
> >     skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); @@
> > -686,8 +692,10 @@ int netvsc_recv_callback(struct hv_device *device_obj,
> >     skb_record_rx_queue(skb, packet->channel->
> >                         offermsg.offer.sub_channel_index);
> >
> > -   net->stats.rx_packets++;
> > -   net->stats.rx_bytes += packet->total_data_buflen;
> > +   u64_stats_update_begin(&rx_stats->s_sync);
> > +   rx_stats->packets++;
> > +   rx_stats->bytes += packet->total_data_buflen;
> > +   u64_stats_update_end(&rx_stats->s_sync);
> >
> >     /*
> >      * Pass the skb back up. Network stack will deallocate the skb when
> > it @@ -753,6 +761,46 @@ static int netvsc_change_mtu(struct net_device
> *ndev, int mtu)
> >     return 0;
> >  }
> >
> > +static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
> > +                                               struct rtnl_link_stats64 
> > *t) {
> > +   struct net_device_context *ndev_ctx = netdev_priv(net);
> > +   int cpu;
> > +
> > +   for_each_possible_cpu(cpu) {
> > +           struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
> > +                                                       cpu);
> > +           struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
> > +                                                       cpu);
> > +           u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
> > +           unsigned int start;
> > +
> > +           do {
> > +                   start = u64_stats_fetch_begin_irq(&tx_stats->s_sync);
> > +                   tx_packets = tx_stats->packets;
> > +                   tx_bytes = tx_stats->bytes;
> > +           } while (u64_stats_fetch_retry_irq(&tx_stats->s_sync, start));
> > +
> > +           do {
> > +                   start = u64_stats_fetch_begin_irq(&rx_stats->s_sync);
> > +                   rx_packets = rx_stats->packets;
> > +                   rx_bytes = rx_stats->bytes;
> > +           } while (u64_stats_fetch_retry_irq(&rx_stats->s_sync, start));
> > +
> > +           t->tx_bytes     += tx_bytes;
> > +           t->tx_packets   += tx_packets;
> > +           t->rx_bytes     += rx_bytes;
> > +           t->rx_packets   += rx_packets;
> > +   }
> > +
> > +   t->tx_dropped   = net->stats.tx_dropped;
> > +   t->tx_errors    = net->stats.tx_dropped;
> > +
> > +   t->rx_dropped   = net->stats.rx_dropped;
> > +   t->rx_errors    = net->stats.rx_errors;
> > +
> > +   return t;
> > +}
> >
> >  static int netvsc_set_mac_addr(struct net_device *ndev, void *p)  {
> > @@ -804,6 +852,7 @@ static const struct net_device_ops device_ops = {
> >     .ndo_validate_addr =            eth_validate_addr,
> >     .ndo_set_mac_address =          netvsc_set_mac_addr,
> >     .ndo_select_queue =             netvsc_select_queue,
> > +   .ndo_get_stats64 =              netvsc_get_stats64,
> >  #ifdef CONFIG_NET_POLL_CONTROLLER
> >     .ndo_poll_controller =          netvsc_poll_controller,
> >  #endif
> > @@ -855,6 +904,14 @@ static void netvsc_link_change(struct work_struct *w)
> >             netdev_notify_peers(net);
> >  }
> >
> > +static void netvsc_free_netdev(struct net_device *netdev) {
> > +   struct net_device_context *net_device_ctx = netdev_priv(netdev);
> > +
> > +   free_percpu(net_device_ctx->tx_stats);
> > +   free_percpu(net_device_ctx->rx_stats);
> > +   free_netdev(netdev);
> > +}
> >
> >  static int netvsc_probe(struct hv_device *dev,
> >                     const struct hv_vmbus_device_id *dev_id) @@ -883,6
> +940,13 @@
> > static int netvsc_probe(struct hv_device *dev,
> >             netdev_dbg(net, "netvsc msg_enable: %d\n",
> >                        net_device_ctx->msg_enable);
> >
> > +   net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct
> netvsc_stats);
> > +   if (!net_device_ctx->tx_stats)
> > +           return -ENOMEM;
> > +   net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct
> netvsc_stats);
> > +   if (!net_device_ctx->rx_stats)
> > +           return -ENOMEM;
> 
> Both of these return statements leak.
> 
> The first one leaks the net_device, the second one leaks the net_device and 
> the -
> >tx_stats memory.
> 
> You have to have cleanup paths here.

Thanks David. I will fix this and submit the new patch again.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to