On 6/25/19 5:08 PM, Jakub Kicinski wrote:
On Thu, 20 Jun 2019 13:24:20 -0700, Shannon Nelson wrote:
Add both the Tx and Rx queue setup and handling.  The related
stats display come later.  Instead of using the generic napi
routines used by the slow-path command, the Tx and Rx paths
are simplified and inlined in one file in order to get better
compiler optimizations.

Signed-off-by: Shannon Nelson <snel...@pensando.io>
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c 
b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
index 5ebfaa320edf..6dfcada9e822 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_debugfs.c
@@ -351,6 +351,54 @@ int ionic_debugfs_add_qcq(struct lif *lif, struct qcq *qcq)
                                    desc_blob);
        }
+ if (qcq->flags & QCQ_F_TX_STATS) {
+               stats_dentry = debugfs_create_dir("tx_stats", q_dentry);
+               if (IS_ERR_OR_NULL(stats_dentry))
+                       return PTR_ERR(stats_dentry);
+
+               debugfs_create_u64("dma_map_err", 0400, stats_dentry,
+                                  &qcq->stats->tx.dma_map_err);
+               debugfs_create_u64("pkts", 0400, stats_dentry,
+                                  &qcq->stats->tx.pkts);
+               debugfs_create_u64("bytes", 0400, stats_dentry,
+                                  &qcq->stats->tx.bytes);
+               debugfs_create_u64("clean", 0400, stats_dentry,
+                                  &qcq->stats->tx.clean);
+               debugfs_create_u64("linearize", 0400, stats_dentry,
+                                  &qcq->stats->tx.linearize);
+               debugfs_create_u64("no_csum", 0400, stats_dentry,
+                                  &qcq->stats->tx.no_csum);
+               debugfs_create_u64("csum", 0400, stats_dentry,
+                                  &qcq->stats->tx.csum);
+               debugfs_create_u64("crc32_csum", 0400, stats_dentry,
+                                  &qcq->stats->tx.crc32_csum);
+               debugfs_create_u64("tso", 0400, stats_dentry,
+                                  &qcq->stats->tx.tso);
+               debugfs_create_u64("frags", 0400, stats_dentry,
+                                  &qcq->stats->tx.frags);
I wonder why debugfs over ethtool -S?

I believe this was from early engineering, before ethtool -S had been filled out.  I'll clean that up.


+static int ionic_tx(struct queue *q, struct sk_buff *skb)
+{
+       struct tx_stats *stats = q_to_tx_stats(q);
+       int err;
+
+       if (skb->ip_summed == CHECKSUM_PARTIAL)
+               err = ionic_tx_calc_csum(q, skb);
+       else
+               err = ionic_tx_calc_no_csum(q, skb);
+       if (err)
+               return err;
+
+       err = ionic_tx_skb_frags(q, skb);
+       if (err)
+               return err;
+
+       skb_tx_timestamp(skb);
+       stats->pkts++;
+       stats->bytes += skb->len;
nit: I think counting stats on completion may be a better idea,
      otherwise when you can a full ring on stop your HW counters are
      guaranteed to be different than SW counters.  Am I wrong?

You are not wrong, that is how many drivers handle it.  I like seeing how much the driver was given (ethtool -S) versus how much the HW actually pushed out (netstat -i or ip -s link show).  These numbers shouldn't be very often be very different, but it is interesting when they are.


+       ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+
+       return 0;
+}
+
+static int ionic_tx_descs_needed(struct queue *q, struct sk_buff *skb)
+{
+       struct tx_stats *stats = q_to_tx_stats(q);
+       int err;
+
+       /* If TSO, need roundup(skb->len/mss) descs */
+       if (skb_is_gso(skb))
+               return (skb->len / skb_shinfo(skb)->gso_size) + 1;
This doesn't look correct, are you sure you don't want
skb_shinfo(skb)->gso_segs ?

That would probably work as well.


+
+       /* If non-TSO, just need 1 desc and nr_frags sg elems */
+       if (skb_shinfo(skb)->nr_frags <= IONIC_TX_MAX_SG_ELEMS)
+               return 1;
+
+       /* Too many frags, so linearize */
+       err = skb_linearize(skb);
+       if (err)
+               return err;
+
+       stats->linearize++;
+
+       /* Need 1 desc and zero sg elems */
+       return 1;
+}
+
+netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       u16 queue_index = skb_get_queue_mapping(skb);
+       struct lif *lif = netdev_priv(netdev);
+       struct queue *q;
+       int ndescs;
+       int err;
+
+       if (unlikely(!test_bit(LIF_UP, lif->state))) {
+               dev_kfree_skb(skb);
+               return NETDEV_TX_OK;
+       }
Surely you stop TX before taking the queues down?

Yes, in ionic_lif_stop()



+       if (likely(lif_to_txqcq(lif, queue_index)))
+               q = lif_to_txq(lif, queue_index);
+       else
+               q = lif_to_txq(lif, 0);
+
+       ndescs = ionic_tx_descs_needed(q, skb);
+       if (ndescs < 0)
+               goto err_out_drop;
+
+       if (!ionic_q_has_space(q, ndescs)) {
+               netif_stop_subqueue(netdev, queue_index);
+               q->stop++;
+
+               /* Might race with ionic_tx_clean, check again */
+               smp_rmb();
+               if (ionic_q_has_space(q, ndescs)) {
+                       netif_wake_subqueue(netdev, queue_index);
+                       q->wake++;
+               } else {
+                       return NETDEV_TX_BUSY;
+               }
+       }
+
+       if (skb_is_gso(skb))
+               err = ionic_tx_tso(q, skb);
+       else
+               err = ionic_tx(q, skb);
+
+       if (err)
+               goto err_out_drop;
+
+       return NETDEV_TX_OK;
+
+err_out_drop:
+       q->drop++;
+       dev_kfree_skb(skb);
+       return NETDEV_TX_OK;
+}

Reply via email to