This issue was discovered under the case of software vm2vm fowarding. When pkts are received from virtio device 0 and tx_route to virtio device 1, tx of device 0 is not updated.
To fix this problem, we check each rx/tx branch to update stats. Besides, the stats are printed at a separated thread, so we design a mechanism to make sure unreasonable data will not show. Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com> Tested-by: Qian Xu <qian.q.xu at intel.com> --- examples/vhost/main.c | 71 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 9bfda6d..1863dc3 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -312,12 +312,25 @@ struct ipv4_hdr { /* Per-device statistics struct */ struct device_statistics { - uint64_t tx_total; + /* rx: from vhost to virtio; tx: from virtio to vhost */ + + /* write-only by datapath threads */ + /* for non zero-copy case, pkts may be enqueued by any lcore */ rte_atomic64_t rx_total_atomic; - uint64_t rx_total; - uint64_t tx; rte_atomic64_t rx_atomic; + /* for zero-copy case */ + uint64_t rx_total; uint64_t rx; + /* write-only by corresponding datapath thread */ + uint64_t tx_total; + uint64_t tx; + + /* write-only by print-stats threads */ + uint64_t rx_total_p; + uint64_t rx_p; + uint64_t tx_total_p; + uint64_t tx_p; + } __rte_cache_aligned; struct device_statistics dev_statistics[MAX_DEVICES]; @@ -2787,7 +2800,6 @@ static void print_stats(void) { struct virtio_net_data_ll *dev_ll; - uint64_t tx_dropped, rx_dropped; uint64_t tx, tx_total, rx, rx_total; uint32_t device_fh; const char clr[] = { 27, '[', '2', 'J', '\0' }; @@ -2804,9 +2816,18 @@ print_stats(void) dev_ll = ll_root_used; while (dev_ll != NULL) { device_fh = (uint32_t)dev_ll->vdev->dev->device_fh; - tx_total = dev_statistics[device_fh].tx_total; - tx = dev_statistics[device_fh].tx; - tx_dropped = tx_total - tx; + + tx_total = dev_statistics[device_fh].tx_total - + dev_statistics[device_fh].tx_total_p; + tx = dev_statistics[device_fh].tx - + dev_statistics[device_fh].tx_p; + /* Because we do not use a lock to control the access of tx_total + * and tx in dev_statistics, tx may be greater than tx_total. If + * this happens, we'll count those redundant tx next time. + */ + if (unlikely(tx_total < tx)) + tx = tx_total; + if (zero_copy == 0) { rx_total = rte_atomic64_read( &dev_statistics[device_fh].rx_total_atomic); @@ -2816,22 +2837,32 @@ print_stats(void) rx_total = dev_statistics[device_fh].rx_total; rx = dev_statistics[device_fh].rx; } - rx_dropped = rx_total - rx; + rx_total -= dev_statistics[device_fh].rx_total_p; + rx -= dev_statistics[device_fh].rx_p; + if (unlikely(rx_total < rx)) + rx = rx_total; + + dev_statistics[device_fh].rx_total_p += rx_total; + dev_statistics[device_fh].rx_p += rx; + dev_statistics[device_fh].tx_total_p += tx_total; + dev_statistics[device_fh].tx_p += tx; printf("\nStatistics for device %"PRIu32" ------------------------------" - "\nTX total: %"PRIu64"" - "\nTX dropped: %"PRIu64"" - "\nTX successful: %"PRIu64"" - "\nRX total: %"PRIu64"" - "\nRX dropped: %"PRIu64"" - "\nRX successful: %"PRIu64"", + "\n\tTX total:\t\t\t%"PRIu64"" + "\n\tTX dropped:\t\t\t%"PRIu64"" + "\n\tTX successful:\t\t\t%"PRIu64"" + "\n\tRX total:\t\t\t%"PRIu64"" + "\n\tRX dropped:\t\t\t%"PRIu64"" + "\n\tRX successful:\t\t\t%"PRIu64"", device_fh, - tx_total, - tx_dropped, - tx, - rx_total, - rx_dropped, - rx); + dev_statistics[device_fh].tx_total_p, + (dev_statistics[device_fh].tx_total_p + - dev_statistics[device_fh].tx_p), + dev_statistics[device_fh].tx_p, + dev_statistics[device_fh].rx_total_p, + (dev_statistics[device_fh].rx_total_p + - dev_statistics[device_fh].rx_p), + dev_statistics[device_fh].rx_p); dev_ll = dev_ll->next; } -- 2.1.4