1) Use hwrm_stat_ctx_query command to query statistics
Using hwrm_stat_ctx_query command will allow polling
the statistics from hardware instead of using the current push
model from the hardware which does a DMA of the stats to the host
at fixed intervals.
2) Use the rx_mbuf_alloc_fail to track mbuf alloc failures.
3) We were wrongly incrementing hwrm_cmd_seq in bnxt_hwrm_stat_clear
and bnxt_hwrm_stat_ctx_alloc functions.  This patch fixes that.

Signed-off-by: Stephen Hurd <stephen.h...@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khapa...@broadcom.com>
---
 drivers/net/bnxt/bnxt.h                |   1 +
 drivers/net/bnxt/bnxt_ethdev.c         |   1 +
 drivers/net/bnxt/bnxt_hwrm.c           |  45 ++++++++++++--
 drivers/net/bnxt/bnxt_hwrm.h           |   2 +
 drivers/net/bnxt/bnxt_rxr.c            |  16 +++--
 drivers/net/bnxt/bnxt_stats.c          |  59 +++---------------
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 108 +++++++++++++++++++++++++++++++++
 7 files changed, 170 insertions(+), 62 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index f25a1c4..387dab7 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -208,6 +208,7 @@ struct bnxt {
        uint16_t                vxlan_fw_dst_port_id;
        uint16_t                geneve_fw_dst_port_id;
        uint32_t                fw_ver;
+       rte_atomic64_t          rx_mbuf_alloc_fail;
 };
 
 /*
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index d6c36b9..80b85ab 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1595,6 +1595,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 
        bp = eth_dev->data->dev_private;
 
+       rte_atomic64_init(&bp->rx_mbuf_alloc_fail);
        bp->dev_stopped = 1;
 
        if (bnxt_vf_pciid(pci_dev->id.device_id))
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index b6fff91..7808f28 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -832,13 +832,12 @@ int bnxt_hwrm_stat_clear(struct bnxt *bp, struct 
bnxt_cp_ring_info *cpr)
        struct hwrm_stat_ctx_clr_stats_input req = {.req_type = 0 };
        struct hwrm_stat_ctx_clr_stats_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, STAT_CTX_CLR_STATS, -1, resp);
-
        if (cpr->hw_stats_ctx_id == (uint32_t)HWRM_NA_SIGNATURE)
                return rc;
 
+       HWRM_PREP(req, STAT_CTX_CLR_STATS, -1, resp);
+
        req.stat_ctx_id = rte_cpu_to_le_16(cpr->hw_stats_ctx_id);
-       req.seq_id = rte_cpu_to_le_16(bp->hwrm_cmd_seq++);
 
        rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
@@ -856,9 +855,8 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct 
bnxt_cp_ring_info *cpr,
 
        HWRM_PREP(req, STAT_CTX_ALLOC, -1, resp);
 
-       req.update_period_ms = rte_cpu_to_le_32(1000);
+       req.update_period_ms = rte_cpu_to_le_32(0);
 
-       req.seq_id = rte_cpu_to_le_16(bp->hwrm_cmd_seq++);
        req.stats_dma_addr =
            rte_cpu_to_le_64(cpr->hw_stats_map);
 
@@ -881,7 +879,6 @@ int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, struct 
bnxt_cp_ring_info *cpr,
        HWRM_PREP(req, STAT_CTX_FREE, -1, resp);
 
        req.stat_ctx_id = rte_cpu_to_le_16(cpr->hw_stats_ctx_id);
-       req.seq_id = rte_cpu_to_le_16(bp->hwrm_cmd_seq++);
 
        rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
 
@@ -2481,6 +2478,42 @@ int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, uint16_t 
target_id,
        return rc;
 }
 
+int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx,
+                        struct rte_eth_stats *stats)
+{
+       int rc = 0;
+       struct hwrm_stat_ctx_query_input req = {.req_type = 0};
+       struct hwrm_stat_ctx_query_output *resp = bp->hwrm_cmd_resp_addr;
+
+       HWRM_PREP(req, STAT_CTX_QUERY, -1, resp);
+
+       req.stat_ctx_id = rte_cpu_to_le_32(cid);
+
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+
+       HWRM_CHECK_RESULT;
+
+       stats->q_ipackets[idx] = rte_le_to_cpu_64(resp->rx_ucast_pkts);
+       stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_mcast_pkts);
+       stats->q_ipackets[idx] += rte_le_to_cpu_64(resp->rx_bcast_pkts);
+       stats->q_ibytes[idx] = rte_le_to_cpu_64(resp->rx_ucast_bytes);
+       stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_mcast_bytes);
+       stats->q_ibytes[idx] += rte_le_to_cpu_64(resp->rx_bcast_bytes);
+
+       stats->q_opackets[idx] = rte_le_to_cpu_64(resp->tx_ucast_pkts);
+       stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_mcast_pkts);
+       stats->q_opackets[idx] += rte_le_to_cpu_64(resp->tx_bcast_pkts);
+       stats->q_obytes[idx] = rte_le_to_cpu_64(resp->tx_ucast_bytes);
+       stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_mcast_bytes);
+       stats->q_obytes[idx] += rte_le_to_cpu_64(resp->tx_bcast_bytes);
+
+       stats->q_errors[idx] = rte_le_to_cpu_64(resp->rx_err_pkts);
+       stats->q_errors[idx] += rte_le_to_cpu_64(resp->tx_err_pkts);
+       stats->q_errors[idx] += rte_le_to_cpu_64(resp->rx_drop_pkts);
+
+       return rc;
+}
+
 static void bnxt_vnic_count(struct bnxt_vnic_info *vnic, void *cbdata)
 {
        uint32_t *count = cbdata;
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 2d707a3..7fc7b57 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -87,6 +87,8 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp,
                             struct bnxt_cp_ring_info *cpr, unsigned int idx);
 int bnxt_hwrm_stat_ctx_free(struct bnxt *bp,
                            struct bnxt_cp_ring_info *cpr, unsigned int idx);
+int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx,
+                        struct rte_eth_stats *stats);
 
 int bnxt_hwrm_ver_get(struct bnxt *bp);
 
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index de86161..4e4f071 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -68,8 +68,10 @@ static inline int bnxt_alloc_rx_data(struct bnxt_rx_queue 
*rxq,
        struct rte_mbuf *data;
 
        data = __bnxt_alloc_rx_data(rxq->mb_pool);
-       if (!data)
+       if (!data) {
+               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
                return -ENOMEM;
+       }
 
        rx_buf->mbuf = data;
 
@@ -87,8 +89,10 @@ static inline int bnxt_alloc_ag_data(struct bnxt_rx_queue 
*rxq,
        struct rte_mbuf *data;
 
        data = __bnxt_alloc_rx_data(rxq->mb_pool);
-       if (!data)
+       if (!data) {
+               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
                return -ENOMEM;
+       }
 
        if (rxbd == NULL)
                RTE_LOG(ERR, PMD, "Jumbo Frame. rxbd is NULL\n");
@@ -321,8 +325,10 @@ static inline struct rte_mbuf *bnxt_tpa_end(
 
        struct rte_mbuf *new_data = __bnxt_alloc_rx_data(rxq->mb_pool);
        RTE_ASSERT(new_data != NULL);
-       if (!new_data)
+       if (!new_data) {
+               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
                return NULL;
+       }
        tpa_info->mbuf = new_data;
 
        return mbuf;
@@ -678,8 +684,10 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
                for (i = 0; i < BNXT_TPA_MAX; i++) {
                        rxr->tpa_info[i].mbuf =
                                __bnxt_alloc_rx_data(rxq->mb_pool);
-                       if (!rxr->tpa_info[i].mbuf)
+                       if (!rxr->tpa_info[i].mbuf) {
+                               rte_atomic64_inc(&rxq->bp->rx_mbuf_alloc_fail);
                                return -ENOMEM;
+                       }
                }
        }
 
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index 1db678b..fa2f467 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -196,64 +196,18 @@ void bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
        for (i = 0; i < bp->rx_cp_nr_rings; i++) {
                struct bnxt_rx_queue *rxq = bp->rx_queues[i];
                struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
-               struct ctx_hw_stats64 *hw_stats =
-                   (struct ctx_hw_stats64 *)cpr->hw_stats;
-
-               bnxt_stats->q_ipackets[i] +=
-                   rte_le_to_cpu_64(hw_stats->rx_ucast_pkts);
-               bnxt_stats->q_ipackets[i] +=
-                   rte_le_to_cpu_64(hw_stats->rx_mcast_pkts);
-               bnxt_stats->q_ipackets[i] +=
-                   rte_le_to_cpu_64(hw_stats->rx_bcast_pkts);
-
-               bnxt_stats->q_ibytes[i] +=
-                   rte_le_to_cpu_64(hw_stats->rx_ucast_bytes);
-               bnxt_stats->q_ibytes[i] +=
-                   rte_le_to_cpu_64(hw_stats->rx_mcast_bytes);
-               bnxt_stats->q_ibytes[i] +=
-                   rte_le_to_cpu_64(hw_stats->rx_bcast_bytes);
-
-               /*
-                * TBD: No clear mapping to this... we don't seem
-                * to have a stat specifically for dropped due to
-                * insufficient mbufs.
-                */
-               bnxt_stats->q_errors[i] = 0;
-
-               /* These get replaced once the *_QSTATS commands work */
-               bnxt_stats->ipackets += bnxt_stats->q_ipackets[i];
-               bnxt_stats->ibytes += bnxt_stats->q_ibytes[i];
-               bnxt_stats->imissed += bnxt_stats->q_errors[i];
-               bnxt_stats->ierrors +=
-                               rte_le_to_cpu_64(hw_stats->rx_discard_pkts);
+
+               bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i, bnxt_stats);
        }
 
        for (i = 0; i < bp->tx_cp_nr_rings; i++) {
                struct bnxt_tx_queue *txq = bp->tx_queues[i];
                struct bnxt_cp_ring_info *cpr = txq->cp_ring;
-               struct ctx_hw_stats64 *hw_stats =
-                   (struct ctx_hw_stats64 *)cpr->hw_stats;
-
-               bnxt_stats->q_opackets[i] +=
-                   rte_le_to_cpu_64(hw_stats->tx_ucast_pkts);
-               bnxt_stats->q_opackets[i] +=
-                   rte_le_to_cpu_64(hw_stats->tx_mcast_pkts);
-               bnxt_stats->q_opackets[i] +=
-                   rte_le_to_cpu_64(hw_stats->tx_bcast_pkts);
-
-               bnxt_stats->q_obytes[i] +=
-                   rte_le_to_cpu_64(hw_stats->tx_ucast_bytes);
-               bnxt_stats->q_obytes[i] +=
-                   rte_le_to_cpu_64(hw_stats->tx_mcast_bytes);
-               bnxt_stats->q_obytes[i] +=
-                   rte_le_to_cpu_64(hw_stats->tx_bcast_bytes);
-
-               /* These get replaced once the *_QSTATS commands work */
-               bnxt_stats->opackets += bnxt_stats->q_opackets[i];
-               bnxt_stats->obytes +=  bnxt_stats->q_obytes[i];
-               bnxt_stats->oerrors += rte_le_to_cpu_64(hw_stats->tx_drop_pkts);
-               bnxt_stats->oerrors += 
rte_le_to_cpu_64(hw_stats->tx_discard_pkts);
+
+               bnxt_hwrm_ctx_qstats(bp, cpr->hw_stats_ctx_id, i, bnxt_stats);
        }
+       bnxt_hwrm_func_qstats(bp, 0xffff, bnxt_stats);
+       bnxt_stats->rx_nombuf = rte_atomic64_read(&bp->rx_mbuf_alloc_fail);
 }
 
 void bnxt_stats_reset_op(struct rte_eth_dev *eth_dev)
@@ -261,6 +215,7 @@ void bnxt_stats_reset_op(struct rte_eth_dev *eth_dev)
        struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 
        bnxt_clear_all_hwrm_stat_ctxs(bp);
+       rte_atomic64_clear(&bp->rx_mbuf_alloc_fail);
 }
 
 int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h 
b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 84c53d7..28c69d0 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -163,6 +163,7 @@ struct ctx_hw_stats64 {
 #define HWRM_TUNNEL_DST_PORT_FREE      (UINT32_C(0xa2))
 #define HWRM_STAT_CTX_ALLOC            (UINT32_C(0xb0))
 #define HWRM_STAT_CTX_FREE             (UINT32_C(0xb1))
+#define HWRM_STAT_CTX_QUERY            (UINT32_C(0xb2))
 #define HWRM_STAT_CTX_CLR_STATS                (UINT32_C(0xb3))
 #define HWRM_EXEC_FWD_RESP             (UINT32_C(0xd0))
 #define HWRM_REJECT_FWD_RESP           (UINT32_C(0xd1))
@@ -3378,6 +3379,113 @@ struct hwrm_func_qstats_output {
         */
 } __attribute__((packed));
 
+/* hwrm_stat_ctx_query */
+/* Description: This command returns statistics of a context. */
+/* Input       (24 bytes) */
+struct hwrm_stat_ctx_query_input {
+       uint16_t req_type;
+       /*
+        * This value indicates what type of request this is. The format
+        * for the rest of the command is determined by this field.
+        */
+       uint16_t cmpl_ring;
+       /*
+        * This value indicates the what completion ring the request
+        * will be optionally completed on. If the value is -1, then no
+        * CR completion will be generated. Any other value must be a
+        * valid CR ring_id value for this function.
+        */
+       uint16_t seq_id;
+       /* This value indicates the command sequence number. */
+       uint16_t target_id;
+       /*
+        * Target ID of this command. 0x0 - 0xFFF8 - Used for function
+        * ids 0xFFF8 - 0xFFFE - Reserved for internal processors 0xFFFF
+        * - HWRM
+        */
+       uint64_t resp_addr;
+       /*
+        * This is the host address where the response will be written
+        * when the request is complete. This area must be 16B aligned
+        * and must be cleared to zero before the request is made.
+        */
+       uint32_t stat_ctx_id;
+       /* ID of the statistics context that is being queried. */
+       uint32_t unused_0;
+} __attribute__((packed));
+
+/* Output      (176 bytes) */
+struct hwrm_stat_ctx_query_output {
+       uint16_t error_code;
+       /*
+        * Pass/Fail or error type Note: receiver to verify the in
+        * parameters, and fail the call with an error when appropriate
+        */
+       uint16_t req_type;
+       /* This field returns the type of original request. */
+       uint16_t seq_id;
+       /* This field provides original sequence number of the command. */
+       uint16_t resp_len;
+       /*
+        * This field is the length of the response in bytes. The last
+        * byte of the response is a valid flag that will read as '1'
+        * when the command has been completely written to memory.
+        */
+       uint64_t tx_ucast_pkts;
+       /* Number of transmitted unicast packets */
+       uint64_t tx_mcast_pkts;
+       /* Number of transmitted multicast packets */
+       uint64_t tx_bcast_pkts;
+       /* Number of transmitted broadcast packets */
+       uint64_t tx_err_pkts;
+       /* Number of transmitted packets with error */
+       uint64_t tx_drop_pkts;
+       /* Number of dropped packets on transmit path */
+       uint64_t tx_ucast_bytes;
+       /* Number of transmitted bytes for unicast traffic */
+       uint64_t tx_mcast_bytes;
+       /* Number of transmitted bytes for multicast traffic */
+       uint64_t tx_bcast_bytes;
+       /* Number of transmitted bytes for broadcast traffic */
+       uint64_t rx_ucast_pkts;
+       /* Number of received unicast packets */
+       uint64_t rx_mcast_pkts;
+       /* Number of received multicast packets */
+       uint64_t rx_bcast_pkts;
+       /* Number of received broadcast packets */
+       uint64_t rx_err_pkts;
+       /* Number of received packets with error */
+       uint64_t rx_drop_pkts;
+       /* Number of dropped packets on received path */
+       uint64_t rx_ucast_bytes;
+       /* Number of received bytes for unicast traffic */
+       uint64_t rx_mcast_bytes;
+       /* Number of received bytes for multicast traffic */
+       uint64_t rx_bcast_bytes;
+       /* Number of received bytes for broadcast traffic */
+       uint64_t rx_agg_pkts;
+       /* Number of aggregated unicast packets */
+       uint64_t rx_agg_bytes;
+       /* Number of aggregated unicast bytes */
+       uint64_t rx_agg_events;
+       /* Number of aggregation events */
+       uint64_t rx_agg_aborts;
+       /* Number of aborted aggregations */
+       uint32_t unused_0;
+       uint8_t unused_1;
+       uint8_t unused_2;
+       uint8_t unused_3;
+       uint8_t valid;
+       /*
+        * This field is used in Output records to indicate that the
+        * output is completely written to RAM. This field should be
+        * read as '1' to indicate that the output has been completely
+        * written. When writing a command completion or response to an
+        * internal processor, the order of writes has to be such that
+        * this field is written last.
+        */
+} __attribute__((packed));
+
 /* hwrm_func_clr_stats */
 /*
  * Description: This command clears statistics of a function. The input FID
-- 
2.10.1 (Apple Git-78)

Reply via email to