On 11/02/2017 11:52 AM, Miquel Raynal wrote:
> Add ethtool statistics support by reading the GOP statistics from the
> hardware counters. Also implement a workqueue to gather the statistics
> every second or some 32-bit counters could overflow.
> 
> Suggested-by: Stefan Chulski <stef...@marvell.com>
> Signed-off-by: Miquel Raynal <miquel.ray...@free-electrons.com>
> ---
>  drivers/net/ethernet/marvell/mvpp2.c | 226 
> ++++++++++++++++++++++++++++++++++-
>  1 file changed, 220 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/ethernet/marvell/mvpp2.c 
> b/drivers/net/ethernet/marvell/mvpp2.c
> index 97efe4733661..fb92a0927116 100644
> --- a/drivers/net/ethernet/marvell/mvpp2.c
> +++ b/drivers/net/ethernet/marvell/mvpp2.c
> @@ -769,6 +769,44 @@ enum mvpp2_bm_type {
>       MVPP2_BM_SWF_SHORT
>  };
>  
> +/* GMAC MIB Counters register definitions */
> +#define MVPP21_MIB_COUNTERS_OFFSET           0x1000
> +#define MVPP21_MIB_COUNTERS_PORT_SZ          0x400
> +#define MVPP22_MIB_COUNTERS_OFFSET           0x0
> +#define MVPP22_MIB_COUNTERS_PORT_SZ          0x100
> +
> +#define MVPP2_MIB_GOOD_OCTETS_RCVD_LOW               0x0
> +#define MVPP2_MIB_GOOD_OCTETS_RCVD_HIGH              0x4
> +#define MVPP2_MIB_BAD_OCTETS_RCVD            0x8
> +#define MVPP2_MIB_CRC_ERRORS_SENT            0xc
> +#define MVPP2_MIB_UNICAST_FRAMES_RCVD                0x10
> +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD              0x18
> +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD              0x1c
> +#define MVPP2_MIB_FRAMES_64_OCTETS           0x20
> +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS    0x24
> +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS   0x28
> +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS   0x2c
> +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS  0x30
> +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS  0x34
> +#define MVPP2_MIB_GOOD_OCTETS_SENT_LOW               0x38
> +#define MVPP2_MIB_GOOD_OCTETS_SENT_HIGH              0x3c
> +#define MVPP2_MIB_UNICAST_FRAMES_SENT                0x40
> +#define MVPP2_MIB_MULTICAST_FRAMES_SENT              0x48
> +#define MVPP2_MIB_BROADCAST_FRAMES_SENT              0x4c
> +#define MVPP2_MIB_FC_SENT                    0x54
> +#define MVPP2_MIB_FC_RCVD                    0x58
> +#define MVPP2_MIB_RX_FIFO_OVERRUN            0x5c
> +#define MVPP2_MIB_UNDERSIZE_RCVD             0x60
> +#define MVPP2_MIB_FRAGMENTS_RCVD             0x64
> +#define MVPP2_MIB_OVERSIZE_RCVD                      0x68
> +#define MVPP2_MIB_JABBER_RCVD                        0x6c
> +#define MVPP2_MIB_MAC_RCV_ERROR                      0x70
> +#define MVPP2_MIB_BAD_CRC_EVENT                      0x74
> +#define MVPP2_MIB_COLLISION                  0x78
> +#define MVPP2_MIB_LATE_COLLISION             0x7c
> +
> +#define MVPP2_MIB_COUNTERS_STATS_DELAY               (1 * HZ)
> +
>  /* Definitions */
>  
>  /* Shared Packet Processor resources */
> @@ -796,6 +834,7 @@ struct mvpp2 {
>       struct clk *axi_clk;
>  
>       /* List of pointers to port structures */
> +     int port_count;
>       struct mvpp2_port **port_list;
>  
>       /* Aggregated TXQs */
> @@ -817,6 +856,10 @@ struct mvpp2 {
>  
>       /* Maximum number of RXQs per port */
>       unsigned int max_port_rxqs;
> +
> +     /* Workqueue to gather hardware statistics */
> +     struct delayed_work stats_work;
> +     struct workqueue_struct *stats_queue;
>  };
>  
>  struct mvpp2_pcpu_stats {
> @@ -879,6 +922,7 @@ struct mvpp2_port {
>       u16 tx_ring_size;
>       u16 rx_ring_size;
>       struct mvpp2_pcpu_stats __percpu *stats;
> +     u64 *ethtool_stats;
>  
>       phy_interface_t phy_interface;
>       struct device_node *phy_node;
> @@ -4743,9 +4787,137 @@ static void mvpp2_port_loopback_set(struct mvpp2_port 
> *port)
>       writel(val, port->base + MVPP2_GMAC_CTRL_1_REG);
>  }
>  
> +static u64 mvpp2_read_count(struct mvpp2_port *port, unsigned int offset)
> +{
> +     bool reg_is_64b =
> +             (offset == MVPP2_MIB_GOOD_OCTETS_RCVD_LOW) ||
> +             (offset == MVPP2_MIB_GOOD_OCTETS_SENT_LOW);

This does not scale very well, put that in your statistics structure and
define a member "reg_is_64b" there such that you can pass a pointer to
one of these members here, and check, on per-counter basis whether this
is needed or not.

> +     void __iomem *base;
> +     u64 val;
> +
> +     if (port->priv->hw_version == MVPP21)
> +             base = port->priv->lms_base + MVPP21_MIB_COUNTERS_OFFSET +
> +                    port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
> +     else
> +             base = port->priv->iface_base + MVPP22_MIB_COUNTERS_OFFSET +
> +                    port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ;
> +
> +     val = readl(base + offset);
> +     if (reg_is_64b)
> +             val += (u64)readl(base + offset + 4) << 32;

So the value gets latched when the higher part gets read last?

> +
> +     return val;
> +}
> +
> +struct mvpp2_ethtool_statistics {
> +     unsigned int offset;
> +     const char string[ETH_GSTRING_LEN];

Add your reg_is_64b member here too.

> +};
> +
> +/* Due to the fact that software statistics and hardware statistics are, by
> + * design, incremented at different moments in the chain of packet 
> processing,
> + * it is very likely that incoming packets could have been dropped after 
> being
> + * counted by hardware but before reaching software statistics (most probably
> + * multicast packets), and in the oppposite way, during transmission, FCS 
> bytes
> + * are added in between as well as TSO skb will be split and header bytes 
> added.
> + */

OK, not sure what to make of that comment.

> +static struct mvpp2_ethtool_statistics mvpp2_ethtool_stats[] = {
> +     { MVPP2_MIB_GOOD_OCTETS_RCVD_LOW, "good_octets_received" },
> +     { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" },
> +     { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" },
> +     { MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" },
> +     { MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" },
> +     { MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" },
> +     { MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" },
> +     { MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" },
> +     { MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" },
> +     { MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" },
> +     { MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" },
> +     { MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" },
> +     { MVPP2_MIB_GOOD_OCTETS_SENT_LOW, "good_octets_sent" },
> +     { MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" },
> +     { MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" },
> +     { MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" },
> +     { MVPP2_MIB_FC_SENT, "fc_sent" },
> +     { MVPP2_MIB_FC_RCVD, "fc_received" },
> +     { MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" },
> +     { MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" },
> +     { MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" },
> +     { MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" },
> +     { MVPP2_MIB_JABBER_RCVD, "jabber_received" },
> +     { MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" },
> +     { MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" },
> +     { MVPP2_MIB_COLLISION, "collision" },
> +     { MVPP2_MIB_LATE_COLLISION, "late_collision" },
> +};
> +
> +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset,
> +                                   u8 *data)
> +{
> +     if (sset == ETH_SS_STATS) {
> +             int i;
> +
> +             for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++)
> +                     memcpy(data + i * ETH_GSTRING_LEN,
> +                            &mvpp2_ethtool_stats[i].string, ETH_GSTRING_LEN);
> +     }
> +}
> +
> +static void mvpp2_gather_hw_statistics(struct work_struct *work)
> +{
> +     struct delayed_work *del_work = to_delayed_work(work);
> +     struct mvpp2 *priv = container_of(del_work, struct mvpp2, stats_work);
> +     struct mvpp2_port *port;
> +     u64 *pstats;
> +     int i, j;
> +
> +     for (i = 0; i < priv->port_count; i++) {
> +             if (!priv->port_list[i])
> +                     continue;
> +
> +             port = priv->port_list[i];
> +             pstats = port->ethtool_stats;

port->ethtool_stats was allocated this way:

port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats)) instead of
ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64)

This is probably working right now because mvpp2_ethtool_stats is much
bigger than ARRAY_SIZE(mvpp2_ethtool_stats) * sizeof(u64).


> +             for (j = 0; j < ARRAY_SIZE(mvpp2_ethtool_stats); j++)
> +                     *pstats++ += mvpp2_read_count(
> +                             port, mvpp2_ethtool_stats[j].offset);

You might want to look into the helper functions from
include/linux/u64_stats_sync.h to safely add a 32-bit quantity to a
64-bit quantity on 32-bit hosts.

> +     }
> +
> +     /* No need to read again the counters right after this function if it
> +      * was called asynchronously by the user (ie. use of ethtool).
> +      */
> +     cancel_delayed_work(&priv->stats_work);
> +     queue_delayed_work(priv->stats_queue, &priv->stats_work,
> +                        MVPP2_MIB_COUNTERS_STATS_DELAY);
> +}
> +
> +static void mvpp2_ethtool_get_stats(struct net_device *dev,
> +                                 struct ethtool_stats *stats, u64 *data)
> +{
> +     struct mvpp2_port *port = netdev_priv(dev);
> +
> +     /* Update statistics for all ports, copy only those actually needed */
> +     mvpp2_gather_hw_statistics(&port->priv->stats_work.work);
> +
> +     memcpy(data, port->ethtool_stats,
> +            sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_stats));
> +}
> +
> +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset)
> +{
> +     if (sset == ETH_SS_STATS)
> +             return ARRAY_SIZE(mvpp2_ethtool_stats);
> +
> +     return -EOPNOTSUPP;
> +}
> +
>  static void mvpp2_port_reset(struct mvpp2_port *port)
>  {
>       u32 val;
> +     int i;

unsigned int i

> +
> +     /* Read the GOP statistics to reset the hardware counters */
> +     for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_stats); i++)
> +             mvpp2_read_count(port, mvpp2_ethtool_stats[i].offset);
>  
>       val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) &
>                   ~MVPP2_GMAC_PORT_RESET_MASK;
> @@ -7199,6 +7371,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
>       .get_drvinfo    = mvpp2_ethtool_get_drvinfo,
>       .get_ringparam  = mvpp2_ethtool_get_ringparam,
>       .set_ringparam  = mvpp2_ethtool_set_ringparam,
> +     .get_strings    = mvpp2_ethtool_get_strings,
> +     .get_ethtool_stats = mvpp2_ethtool_get_stats,
> +     .get_sset_count = mvpp2_ethtool_get_sset_count,
>       .get_link_ksettings = phy_ethtool_get_link_ksettings,
>       .set_link_ksettings = phy_ethtool_set_link_ksettings,
>  };
> @@ -7613,13 +7788,19 @@ static int mvpp2_port_probe(struct platform_device 
> *pdev,
>               port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id);
>       }
>  
> -     /* Alloc per-cpu stats */
> +     /* Alloc per-cpu and ethtool stats */
>       port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats);
>       if (!port->stats) {
>               err = -ENOMEM;
>               goto err_free_irq;
>       }
>  
> +     port->ethtool_stats = kzalloc(sizeof(mvpp2_ethtool_stats), GFP_KERNEL);
> +     if (!port->ethtool_stats) {
> +             err = -ENOMEM;
> +             goto err_free_stats;
> +     }

Should not the above be kcalloc(sizeof(u64),
ARRAY_SIZE(mvpp2_ethtool_stats), GFP_KERNEL)? That is, an array of
ARRAY_SIZE(mvpp2_ethtool_stats) elements, each sizeof(u64) bytes wide?

> +
>       mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
>  
>       port->tx_ring_size = MVPP2_MAX_TXD;
> @@ -7629,7 +7810,7 @@ static int mvpp2_port_probe(struct platform_device 
> *pdev,
>       err = mvpp2_port_init(port);
>       if (err < 0) {
>               dev_err(&pdev->dev, "failed to init port %d\n", id);
> -             goto err_free_stats;
> +             goto err_free_ethstats;
>       }
>  
>       mvpp2_port_periodic_xon_disable(port);
> @@ -7685,6 +7866,8 @@ static int mvpp2_port_probe(struct platform_device 
> *pdev,
>  err_free_txq_pcpu:
>       for (i = 0; i < port->ntxqs; i++)
>               free_percpu(port->txqs[i]->pcpu);
> +err_free_ethstats:
> +     kfree(port->ethtool_stats);
>  err_free_stats:
>       free_percpu(port->stats);
>  err_free_irq:
> @@ -7707,6 +7890,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port)
>       of_node_put(port->phy_node);
>       free_percpu(port->pcpu);
>       free_percpu(port->stats);
> +     kfree(port->ethtool_stats);
>       for (i = 0; i < port->ntxqs; i++)
>               free_percpu(port->txqs[i]->pcpu);
>       mvpp2_queue_vectors_deinit(port);
> @@ -7893,7 +8077,7 @@ static int mvpp2_probe(struct platform_device *pdev)
>       struct mvpp2 *priv;
>       struct resource *res;
>       void __iomem *base;
> -     int port_count, i;
> +     int i;
>       int err;
>  
>       priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> @@ -8008,14 +8192,14 @@ static int mvpp2_probe(struct platform_device *pdev)
>               goto err_mg_clk;
>       }
>  
> -     port_count = of_get_available_child_count(dn);
> -     if (port_count == 0) {
> +     priv->port_count = of_get_available_child_count(dn);
> +     if (priv->port_count == 0) {
>               dev_err(&pdev->dev, "no ports enabled\n");
>               err = -ENODEV;
>               goto err_mg_clk;
>       }
>  
> -     priv->port_list = devm_kcalloc(&pdev->dev, port_count,
> +     priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count,
>                                      sizeof(*priv->port_list),
>                                      GFP_KERNEL);>    if (!priv->port_list) {
> @@ -8023,6 +8207,20 @@ static int mvpp2_probe(struct platform_device *pdev)
>               goto err_mg_clk;
>       }
>  
> +     /* Statistics must be gathered regularly because some of them (like
> +      * packets counters) are 32-bit registers and could overflow quite
> +      * quickly. For instance, a 10Gb link used at full bandwidth with the
> +      * smallest packets (64B) will overflow a 32-bit counter in less than
> +      * 30 seconds. Then, use a workqueue to fill 64-bit counters.
> +      */
> +     priv->stats_queue = create_singlethread_workqueue("mvpp2_hw_stats");
> +     if (!priv->stats_queue) {
> +             err = -ENOMEM;
> +             goto err_mg_clk;
> +     }

If I have multiple of these network devices in my system, it would be
nice to have an unique identifier after mvpp22_hw_stats to help
differentiate them (and possibly change their scheduling priorities),
how about using "mvpp2_hw_stats/%d"?

> +
> +     INIT_DELAYED_WORK(&priv->stats_work, mvpp2_gather_hw_statistics);
> +
>       /* Initialize ports */
>       i = 0;
>       for_each_available_child_of_node(dn, port_node) {
> @@ -8033,6 +8231,10 @@ static int mvpp2_probe(struct platform_device *pdev)
>       }
>  
>       platform_set_drvdata(pdev, priv);
> +
> +     queue_delayed_work(priv->stats_queue, &priv->stats_work,
> +                        MVPP2_MIB_COUNTERS_STATS_DELAY);

If the network interface is not used (ndo_open is not called) we have
this workqueue running for nothing because the statistics should not
even increase, and this is just creating unnecessary system activity for
nothing.

> +
>       return 0;
>  
>  err_mg_clk:
> @@ -8053,6 +8255,18 @@ static int mvpp2_remove(struct platform_device *pdev)
>       struct device_node *port_node;
>       int i = 0;
>  
> +     /* This work recall himself within a delay. If the cancellation returned
> +      * a non-zero value, it means a work is still running. In that case, use
> +      * use the flush (returns when the running work will be done) and cancel
> +      * the new work that was just submitted to the queue but not started yet
> +      * due to the delay.
> +      */
> +     if (!cancel_delayed_work(&priv->stats_work)) {
> +             flush_workqueue(priv->stats_queue);
> +             cancel_delayed_work(&priv->stats_work);
> +     }

Similarly, this needs to be moved to the ndo_stop() function.

> +     destroy_workqueue(priv->stats_queue);
> +
>       for_each_available_child_of_node(dn, port_node) {
>               if (priv->port_list[i])
>                       mvpp2_port_remove(priv->port_list[i]);
> 


-- 
Florian

Reply via email to