On 11/02/2017 11:52 AM, Miquel Raynal wrote: > Add ethtool statistics support by reading the GOP statistics from the > hardware counters. Also implement a workqueue to gather the statistics > every second or some 32-bit counters could overflow. > > Suggested-by: Stefan Chulski <stef...@marvell.com> > Signed-off-by: Miquel Raynal <miquel.ray...@free-electrons.com> > --- > drivers/net/ethernet/marvell/mvpp2.c | 226 > ++++++++++++++++++++++++++++++++++- > 1 file changed, 220 insertions(+), 6 deletions(-) > > diff --git a/drivers/net/ethernet/marvell/mvpp2.c > b/drivers/net/ethernet/marvell/mvpp2.c > index 97efe4733661..fb92a0927116 100644 > --- a/drivers/net/ethernet/marvell/mvpp2.c > +++ b/drivers/net/ethernet/marvell/mvpp2.c > @@ -769,6 +769,44 @@ enum mvpp2_bm_type { > MVPP2_BM_SWF_SHORT > }; > > +/* GMAC MIB Counters register definitions */ > +#define MVPP21_MIB_COUNTERS_OFFSET 0x1000 > +#define MVPP21_MIB_COUNTERS_PORT_SZ 0x400 > +#define MVPP22_MIB_COUNTERS_OFFSET 0x0 > +#define MVPP22_MIB_COUNTERS_PORT_SZ 0x100 > + > +#define MVPP2_MIB_GOOD_OCTETS_RCVD_LOW 0x0 > +#define MVPP2_MIB_GOOD_OCTETS_RCVD_HIGH 0x4 > +#define MVPP2_MIB_BAD_OCTETS_RCVD 0x8 > +#define MVPP2_MIB_CRC_ERRORS_SENT 0xc > +#define MVPP2_MIB_UNICAST_FRAMES_RCVD 0x10 > +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD 0x18 > +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD 0x1c > +#define MVPP2_MIB_FRAMES_64_OCTETS 0x20 > +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS 0x24 > +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS 0x28 > +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS 0x2c > +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS 0x30 > +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS 0x34 > +#define MVPP2_MIB_GOOD_OCTETS_SENT_LOW 0x38 > +#define MVPP2_MIB_GOOD_OCTETS_SENT_HIGH 0x3c > +#define MVPP2_MIB_UNICAST_FRAMES_SENT 0x40 > +#define MVPP2_MIB_MULTICAST_FRAMES_SENT 0x48 > +#define MVPP2_MIB_BROADCAST_FRAMES_SENT 0x4c > +#define MVPP2_MIB_FC_SENT 0x54 > +#define MVPP2_MIB_FC_RCVD 0x58 > +#define MVPP2_MIB_RX_FIFO_OVERRUN 0x5c > +#define MVPP2_MIB_UNDERSIZE_RCVD 0x60 > +#define MVPP2_MIB_FRAGMENTS_RCVD 0x64 > +#define MVPP2_MIB_OVERSIZE_RCVD 0x68 > +#define MVPP2_MIB_JABBER_RCVD 0x6c > +#define MVPP2_MIB_MAC_RCV_ERROR 0x70 > +#define MVPP2_MIB_BAD_CRC_EVENT 0x74 > +#define MVPP2_MIB_COLLISION 0x78 > +#define MVPP2_MIB_LATE_COLLISION 0x7c > + > +#define MVPP2_MIB_COUNTERS_STATS_DELAY (1 * HZ) > + > /* Definitions */ > > /* Shared Packet Processor resources */ > @@ -796,6 +834,7 @@ struct mvpp2 { > struct clk *axi_clk; > > /* List of pointers to port structures */ > + int port_count; > struct mvpp2_port **port_list; > > /* Aggregated TXQs */ > @@ -817,6 +856,10 @@ struct mvpp2 { > > /* Maximum number of RXQs per port */ > unsigned int max_port_rxqs; > + > + /* Workqueue to gather hardware statistics */ > + struct delayed_work stats_work; > + struct workqueue_struct *stats_queue; > }; > > struct mvpp2_pcpu_stats { > @@ -879,6 +922,7 @@ struct mvpp2_port { > u16 tx_ring_size; > u16 rx_ring_size; > struct mvpp2_pcpu_stats __percpu *stats; > + u64 *ethtool_stats; > > phy_interface_t phy_interface; > struct device_node *phy_node; > @@ -4743,9 +4787,137 @@ static void mvpp2_port_loopback_set(struct mvpp2_port > *port) > writel(val, port->base + MVPP2_GMAC_CTRL_1_REG); > } > > +static u64 mvpp2_read_count(struct mvpp2_port *port, unsigned int offset) > +{ > + bool reg_is_64b = > + (offset == MVPP2_MIB_GOOD_OCTETS_RCVD_LOW) || > + (offset == MVPP2_MIB_GOOD_OCTETS_SENT_LOW);
This does not scale very well, put that in your statistics structure and define a member "reg_is_64b" there such that you can pass a pointer to one of these members here, and check, on per-counter basis whether this is needed or not. > + void __iomem *base; > + u64 val; > + > + if (port->priv->hw_version == MVPP21) > + base = port->priv->lms_base + MVPP21_MIB_COUNTERS_OFFSET + > + port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ; > + else > + base = port->priv->iface_base + MVPP22_MIB_COUNTERS_OFFSET + > + port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ; > + > + val = readl(base + offset); > + if (reg_is_64b) > + val += (u64)readl(base + offset + 4) << 32; So the value gets latched when the higher part gets read last? > + > + return val; > +} > + > +struct mvpp2_ethtool_statistics { > + unsigned int offset; > + const char string[ETH_GSTRING_LEN]; Add your reg_is_64b member here too. > +}; > + > +/* Due to the fact that software statistics and hardware statistics are, by > + * design, incremented at different moments in the chain of packet > processing, > + * it is very likely that incoming packets could have been dropped after > being > + * counted by hardware but before reaching software statistics (most probably > + * multicast packets), and in the oppposite way, during transmission, FCS > bytes > + * are added in between as well as TSO skb will be split and header bytes > added. > + */ OK, not sure what to make of that comment. > +static struct mvpp2_ethtool_statistics mvpp2_ethtool_stats[] = { > + { MVPP2_MIB_GOOD_OCTETS_RCVD_LOW, "good_octets_received" }, > + { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" }, > + { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" }, > + { MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" }, > + { MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" }, > + { MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" }, > + { MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" }, > + { MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" }, > + { MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" }, > + { MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" }, > + { MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" }, > + { MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" }, > + { MVPP2_MIB_GOOD_OCTETS_SENT_LOW, "good_octets_sent" }, > + { MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" }, > + { MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" }, > + { MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" }, > + { MVPP2_MIB_FC_SENT, "fc_sent" }, > + { MVPP2_MIB_FC_RCVD, "fc_received" }, > + { MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" }, > + { MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" }, > + { MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" }, > + { MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" }, > + { MVPP2_MIB_JABBER_RCVD, "jabber_received" }, > + { MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" }, > + { MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" }, > + { MVPP2_MIB_COLLISION, "collision" }, > + { MVPP2_MIB_LATE_COLLISION, "late_collision" }, > +}; > + > +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, > + u8 *data) > +{ > + if (sset == ETH_SS_STATS) { > + int i; > + > + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++) > + memcpy(data + i * ETH_GSTRING_LEN, > + &mvpp2_ethtool_stats[i].string, ETH_GSTRING_LEN); > + } > +} > + > +static void mvpp2_gather_hw_statistics(struct work_struct *work) > +{ > + struct delayed_work *del_work = to_delayed_work(work); > + struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work); > + struct mvpp2_port *port; > + u64 *pstats; > + int i, j; > + > + for (i = 0; i < priv->port_count; i++) { > + if (!priv->port_list[i]) > + continue; > + > + port = priv->port_list[i]; > + pstats = port->ethtool_stats; port->ethtool_stats was allocated this way: port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats)) instead of ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64) This is probably working right now because mvpp2_ethtool_stats is much bigger than ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64). > + for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_stats); j++) > + *pstats++ += mvpp2_read_count( > + port, mvpp2_ethtool_stats[j].offset); You might want to look into the helper functions from include/linux/u64_stats_sync.h to safely add a 32-bit quantity to a 64-bit quantity on 32-bit hosts. > + } > + > + /* No need to read again the counters right after this function if it > + * was called asynchronously by the user (ie. use of ethtool). > + */ > + cancel_delayed_work(&priv->stats_work); > + queue_delayed_work(priv->stats_queue, &priv->stats_work, > + MVPP2_MIB_COUNTERS_STATS_DELAY); > +} > + > +static void mvpp2_ethtool_get_stats(struct net_device *dev, > + struct ethtool_stats *stats, u64 *data) > +{ > + struct mvpp2_port *port = netdev_priv(dev); > + > + /* Update statistics for all ports, copy only those actually needed */ > + mvpp2_gather_hw_statistics(&port->priv->stats_work.work); > + > + memcpy(data, port->ethtool_stats, > + sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_stats)); > +} > + > +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset) > +{ > + if (sset == ETH_SS_STATS) > + return ARRAY_SIZE(mvpp2_ethtool_stats); > + > + return -EOPNOTSUPP; > +} > + > static void mvpp2_port_reset(struct mvpp2_port *port) > { > u32 val; > + int i; unsigned int i > + > + /* Read the GOP statistics to reset the hardware counters */ > + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++) > + mvpp2_read_count(port, mvpp2_ethtool_stats[i].offset); > > val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) & > ~MVPP2_GMAC_PORT_RESET_MASK; > @@ -7199,6 +7371,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = { > .get_drvinfo = mvpp2_ethtool_get_drvinfo, > .get_ringparam = mvpp2_ethtool_get_ringparam, > .set_ringparam = mvpp2_ethtool_set_ringparam, > + .get_strings = mvpp2_ethtool_get_strings, > + .get_ethtool_stats = mvpp2_ethtool_get_stats, > + .get_sset_count = mvpp2_ethtool_get_sset_count, > .get_link_ksettings = phy_ethtool_get_link_ksettings, > .set_link_ksettings = phy_ethtool_set_link_ksettings, > }; > @@ -7613,13 +7788,19 @@ static int mvpp2_port_probe(struct platform_device > *pdev, > port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id); > } > > - /* Alloc per-cpu stats */ > + /* Alloc per-cpu and ethtool stats */ > port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats); > if (!port->stats) { > err = -ENOMEM; > goto err_free_irq; > } > > + port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats), GFP_KERNEL); > + if (!port->ethtool_stats) { > + err = -ENOMEM; > + goto err_free_stats; > + } Should not the above be kcalloc(sizeof(u64), ARRAY_SIZE(mvpp2_ethtool_stats), GFP_KERNEL)? That is, an array of ARRAY_SIZE(mvpp2_ethtool_stats) elements, each sizeof(u64) bytes wide? > + > mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from); > > port->tx_ring_size = MVPP2_MAX_TXD; > @@ -7629,7 +7810,7 @@ static int mvpp2_port_probe(struct platform_device > *pdev, > err = mvpp2_port_init(port); > if (err < 0) { > dev_err(&pdev->dev, "failed to init port %d\n", id); > - goto err_free_stats; > + goto err_free_ethstats; > } > > mvpp2_port_periodic_xon_disable(port); > @@ -7685,6 +7866,8 @@ static int mvpp2_port_probe(struct platform_device > *pdev, > err_free_txq_pcpu: > for (i = 0; i < port->ntxqs; i++) > free_percpu(port->txqs[i]->pcpu); > +err_free_ethstats: > + kfree(port->ethtool_stats); > err_free_stats: > free_percpu(port->stats); > err_free_irq: > @@ -7707,6 +7890,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port) > of_node_put(port->phy_node); > free_percpu(port->pcpu); > free_percpu(port->stats); > + kfree(port->ethtool_stats); > for (i = 0; i < port->ntxqs; i++) > free_percpu(port->txqs[i]->pcpu); > mvpp2_queue_vectors_deinit(port); > @@ -7893,7 +8077,7 @@ static int mvpp2_probe(struct platform_device *pdev) > struct mvpp2 *priv; > struct resource *res; > void __iomem *base; > - int port_count, i; > + int i; > int err; > > priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); > @@ -8008,14 +8192,14 @@ static int mvpp2_probe(struct platform_device *pdev) > goto err_mg_clk; > } > > - port_count = of_get_available_child_count(dn); > - if (port_count == 0) { > + priv->port_count = of_get_available_child_count(dn); > + if (priv->port_count == 0) { > dev_err(&pdev->dev, "no ports enabled\n"); > err = -ENODEV; > goto err_mg_clk; > } > > - priv->port_list = devm_kcalloc(&pdev->dev, port_count, > + priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count, > sizeof(*priv->port_list), > GFP_KERNEL);> if (!priv->port_list) { > @@ -8023,6 +8207,20 @@ static int mvpp2_probe(struct platform_device *pdev) > goto err_mg_clk; > } > > + /* Statistics must be gathered regularly because some of them (like > + * packets counters) are 32-bit registers and could overflow quite > + * quickly. For instance, a 10Gb link used at full bandwidth with the > + * smallest packets (64B) will overflow a 32-bit counter in less than > + * 30 seconds. Then, use a workqueue to fill 64-bit counters. > + */ > + priv->stats_queue = create_singlethread_workqueue("mvpp2_hw_stats"); > + if (!priv->stats_queue) { > + err = -ENOMEM; > + goto err_mg_clk; > + } If I have multiple of these network devices in my system, it would be nice to have an unique identifier after mvpp22_hw_stats to help differentiate them (and possibly change their scheduling priorities), how about using "mvpp2_hw_stats/%d"? > + > + INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics); > + > /* Initialize ports */ > i = 0; > for_each_available_child_of_node(dn, port_node) { > @@ -8033,6 +8231,10 @@ static int mvpp2_probe(struct platform_device *pdev) > } > > platform_set_drvdata(pdev, priv); > + > + queue_delayed_work(priv->stats_queue, &priv->stats_work, > + MVPP2_MIB_COUNTERS_STATS_DELAY); If the network interface is not used (ndo_open is not called) we have this workqueue running for nothing because the statistics should not even increase, and this is just creating unnecessary system activity for nothing. > + > return 0; > > err_mg_clk: > @@ -8053,6 +8255,18 @@ static int mvpp2_remove(struct platform_device *pdev) > struct device_node *port_node; > int i = 0; > > + /* This work recall himself within a delay. If the cancellation returned > + * a non-zero value, it means a work is still running. In that case, use > + * use the flush (returns when the running work will be done) and cancel > + * the new work that was just submitted to the queue but not started yet > + * due to the delay. > + */ > + if (!cancel_delayed_work(&priv->stats_work)) { > + flush_workqueue(priv->stats_queue); > + cancel_delayed_work(&priv->stats_work); > + } Similarly, this needs to be moved to the ndo_stop() function. > + destroy_workqueue(priv->stats_queue); > + > for_each_available_child_of_node(dn, port_node) { > if (priv->port_list[i]) > mvpp2_port_remove(priv->port_list[i]); > -- Florian