On 17.10.2020 23:35, Vladimir Oltean wrote: > DSA needs to push a header onto every packet on TX, and this might cause > reallocation under certain scenarios, which might affect, for example, > performance. > > But reallocated packets are not standardized in struct pcpu_sw_netstats, > struct net_device_stats or anywhere else, it seems, so we need to roll > our own extra netdevice statistics and expose them to ethtool. > > Signed-off-by: Vladimir Oltean <vladimir.olt...@nxp.com> > --- > net/dsa/dsa_priv.h | 9 +++++++++ > net/dsa/slave.c | 25 ++++++++++++++++++++++--- > 2 files changed, 31 insertions(+), 3 deletions(-) > > diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h > index 12998bf04e55..d39db7500cdd 100644 > --- a/net/dsa/dsa_priv.h > +++ b/net/dsa/dsa_priv.h > @@ -73,12 +73,21 @@ struct dsa_notifier_mtu_info { > int mtu; > }; > > +/* Driver statistics, other than those in struct rtnl_link_stats64. > + * These are collected per-CPU and aggregated by ethtool. > + */ > +struct dsa_slave_stats { > + __u64 tx_reallocs; > + struct u64_stats_sync syncp; > +} __aligned(1 * sizeof(u64)); > +
Wouldn't a simple unsigned long (like in struct net_device_stats) be sufficient here? This would make handling the counter much simpler. And as far as I understand we talk about a packet counter that is touched in certain scenarios only. > struct dsa_slave_priv { > /* Copy of CPU port xmit for faster access in slave transmit hot path */ > struct sk_buff * (*xmit)(struct sk_buff *skb, > struct net_device *dev); > > struct pcpu_sw_netstats __percpu *stats64; > + struct dsa_slave_stats __percpu *extra_stats; > > struct gro_cells gcells; > > diff --git a/net/dsa/slave.c b/net/dsa/slave.c > index 3bc5ca40c9fb..d4326940233c 100644 > --- a/net/dsa/slave.c > +++ b/net/dsa/slave.c > @@ -668,9 +668,10 @@ static void dsa_slave_get_strings(struct net_device *dev, > strncpy(data + len, "tx_bytes", len); > strncpy(data + 2 * len, "rx_packets", len); > strncpy(data + 3 * len, "rx_bytes", len); > + strncpy(data + 4 * len, "tx_reallocs", len); > if (ds->ops->get_strings) > ds->ops->get_strings(ds, dp->index, stringset, > - data + 4 * len); > + data + 5 * len); > } > } > > @@ -682,11 +683,13 @@ static void dsa_slave_get_ethtool_stats(struct > net_device *dev, > struct dsa_slave_priv *p = netdev_priv(dev); > struct dsa_switch *ds = dp->ds; > struct pcpu_sw_netstats *s; > + struct dsa_slave_stats *e; > unsigned int start; > int i; > > for_each_possible_cpu(i) { > u64 tx_packets, tx_bytes, rx_packets, rx_bytes; > + u64 tx_reallocs; > > s = per_cpu_ptr(p->stats64, i); > do { > @@ -696,13 +699,21 @@ static void dsa_slave_get_ethtool_stats(struct > net_device *dev, > rx_packets = s->rx_packets; > rx_bytes = s->rx_bytes; > } while (u64_stats_fetch_retry_irq(&s->syncp, start)); > + > + e = per_cpu_ptr(p->extra_stats, i); > + do { > + start = u64_stats_fetch_begin_irq(&e->syncp); > + tx_reallocs = e->tx_reallocs; > + } while (u64_stats_fetch_retry_irq(&e->syncp, start)); > + > data[0] += tx_packets; > data[1] += tx_bytes; > data[2] += rx_packets; > data[3] += rx_bytes; > + data[4] += tx_reallocs; > } > if (ds->ops->get_ethtool_stats) > - ds->ops->get_ethtool_stats(ds, dp->index, data + 4); > + ds->ops->get_ethtool_stats(ds, dp->index, data + 5); > } > > static int dsa_slave_get_sset_count(struct net_device *dev, int sset) > @@ -713,7 +724,7 @@ static int dsa_slave_get_sset_count(struct net_device > *dev, int sset) > if (sset == ETH_SS_STATS) { > int count; > > - count = 4; > + count = 5; > if (ds->ops->get_sset_count) > count += ds->ops->get_sset_count(ds, dp->index, sset); > > @@ -1806,6 +1817,12 @@ int dsa_slave_create(struct dsa_port *port) > free_netdev(slave_dev); > return -ENOMEM; > } > + p->extra_stats = netdev_alloc_pcpu_stats(struct dsa_slave_stats); > + if (!p->extra_stats) { > + free_percpu(p->stats64); > + free_netdev(slave_dev); > + return -ENOMEM; > + } > > ret = gro_cells_init(&p->gcells, slave_dev); > if (ret) > @@ -1864,6 +1881,7 @@ int dsa_slave_create(struct dsa_port *port) > out_gcells: > gro_cells_destroy(&p->gcells); > out_free: > + free_percpu(p->extra_stats); > free_percpu(p->stats64); > free_netdev(slave_dev); > port->slave = NULL; > @@ -1886,6 +1904,7 @@ void dsa_slave_destroy(struct net_device *slave_dev) > dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); > phylink_destroy(dp->pl); > gro_cells_destroy(&p->gcells); > + free_percpu(p->extra_stats); > free_percpu(p->stats64); > free_netdev(slave_dev); > } >