Reuse the --record-core-cycles option to account for busy cycles. One
turn of packet_fwd_t is considered "busy" if there was at least one
received or transmitted packet.

Add two new interval_cycles and busy_cycles fields to the fwd_stream
structure. The core_cycles field cannot be reset to zero without
breaking the current behaviour.

Update get_end_cycles() to accept an additional argument for the number
of processed packets.

Every 10s, copy the number of cycles (busy and total) into a moving
average buffer. The buffer holds 6 samples of 10s and is rotated when
full.

When --record-core-cycles is specified, register a callback with
rte_lcore_register_busy_percent_cb(). In the callback, access the
average buffer to compute the percentage of busy cycles.

Example:

  testpmd> show lcores
  lcore 3, socket 0, role RTE, cpuset 3, busy N/A
  lcore 4, socket 0, role RTE, cpuset 4, busy 39% last 60 sec

Signed-off-by: Robin Jarry <rja...@redhat.com>
---
 app/test-pmd/5tswap.c         |  5 ++-
 app/test-pmd/csumonly.c       |  6 +--
 app/test-pmd/flowgen.c        |  2 +-
 app/test-pmd/icmpecho.c       |  6 +--
 app/test-pmd/iofwd.c          |  5 ++-
 app/test-pmd/macfwd.c         |  5 ++-
 app/test-pmd/macswap.c        |  5 ++-
 app/test-pmd/noisy_vnf.c      |  4 ++
 app/test-pmd/rxonly.c         |  5 ++-
 app/test-pmd/shared_rxq_fwd.c |  5 ++-
 app/test-pmd/testpmd.c        | 69 +++++++++++++++++++++++++++++++++--
 app/test-pmd/testpmd.h        | 25 +++++++++++--
 app/test-pmd/txonly.c         |  7 ++--
 13 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/app/test-pmd/5tswap.c b/app/test-pmd/5tswap.c
index f041a5e1d530..03225075716c 100644
--- a/app/test-pmd/5tswap.c
+++ b/app/test-pmd/5tswap.c
@@ -116,7 +116,7 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
 
        fs->rx_packets += nb_rx;
        txp = &ports[fs->tx_port];
@@ -182,7 +182,8 @@ pkt_burst_5tuple_swap(struct fwd_stream *fs)
                        rte_pktmbuf_free(pkts_burst[nb_tx]);
                } while (++nb_tx < nb_rx);
        }
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 1c2459851522..03e141221a56 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -868,7 +868,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
 
        fs->rx_packets += nb_rx;
        rx_bad_ip_csum = 0;
@@ -1200,8 +1200,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                        rte_pktmbuf_free(tx_pkts_burst[nb_tx]);
                } while (++nb_tx < nb_rx);
        }
-
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index fd6abc0f4124..7b2f0ffdf0f5 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -196,7 +196,7 @@ pkt_burst_flow_gen(struct fwd_stream *fs)
 
        RTE_PER_LCORE(_next_flow) = next_flow;
 
-       get_end_cycles(fs, start_tsc);
+       get_end_cycles(fs, start_tsc, nb_tx);
 }
 
 static int
diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c
index 066f2a3ab79b..2fc9f96dc95f 100644
--- a/app/test-pmd/icmpecho.c
+++ b/app/test-pmd/icmpecho.c
@@ -303,7 +303,7 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
 
        fs->rx_packets += nb_rx;
        nb_replies = 0;
@@ -508,8 +508,8 @@ reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
                        } while (++nb_tx < nb_replies);
                }
        }
-
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c
index 8fafdec548ad..e5a2dbe20c69 100644
--- a/app/test-pmd/iofwd.c
+++ b/app/test-pmd/iofwd.c
@@ -59,7 +59,7 @@ pkt_burst_io_forward(struct fwd_stream *fs)
                        pkts_burst, nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
        fs->rx_packets += nb_rx;
 
        nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
@@ -84,7 +84,8 @@ pkt_burst_io_forward(struct fwd_stream *fs)
                } while (++nb_tx < nb_rx);
        }
 
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
index beb220fbb462..9db623999970 100644
--- a/app/test-pmd/macfwd.c
+++ b/app/test-pmd/macfwd.c
@@ -65,7 +65,7 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
 
        fs->rx_packets += nb_rx;
        txp = &ports[fs->tx_port];
@@ -115,7 +115,8 @@ pkt_burst_mac_forward(struct fwd_stream *fs)
                } while (++nb_tx < nb_rx);
        }
 
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
index 4f8deb338296..4db134ac1d91 100644
--- a/app/test-pmd/macswap.c
+++ b/app/test-pmd/macswap.c
@@ -66,7 +66,7 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
 
        fs->rx_packets += nb_rx;
        txp = &ports[fs->tx_port];
@@ -93,7 +93,8 @@ pkt_burst_mac_swap(struct fwd_stream *fs)
                        rte_pktmbuf_free(pkts_burst[nb_tx]);
                } while (++nb_tx < nb_rx);
        }
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
index c65ec6f06a5c..290bdcda45f0 100644
--- a/app/test-pmd/noisy_vnf.c
+++ b/app/test-pmd/noisy_vnf.c
@@ -152,6 +152,9 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
        uint64_t delta_ms;
        bool needs_flush = false;
        uint64_t now;
+       uint64_t start_tsc = 0;
+
+       get_start_cycles(&start_tsc);
 
        nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
                        pkts_burst, nb_pkt_per_burst);
@@ -219,6 +222,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
                fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent);
                ncf->prev_time = rte_get_timer_cycles();
        }
+       get_end_cycles(fs, start_tsc, nb_rx + nb_tx);
 }
 
 #define NOISY_STRSIZE 256
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index d528d4f34e60..519202339e16 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -58,13 +58,14 @@ pkt_burst_receive(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
 
        fs->rx_packets += nb_rx;
        for (i = 0; i < nb_rx; i++)
                rte_pktmbuf_free(pkts_burst[i]);
 
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/shared_rxq_fwd.c b/app/test-pmd/shared_rxq_fwd.c
index 2e9047804b5b..395b73bfe52e 100644
--- a/app/test-pmd/shared_rxq_fwd.c
+++ b/app/test-pmd/shared_rxq_fwd.c
@@ -102,9 +102,10 @@ shared_rxq_fwd(struct fwd_stream *fs)
                                 nb_pkt_per_burst);
        inc_rx_burst_stats(fs, nb_rx);
        if (unlikely(nb_rx == 0))
-               return;
+               goto end;
        forward_shared_rxq(fs, nb_rx, pkts_burst);
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_rx);
 }
 
 static void
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 134d79a55547..450bc281fd69 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -2248,20 +2248,26 @@ static void
 run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
 {
        struct fwd_stream **fsm;
+       uint64_t tics_per_1sec;
+       uint64_t tics_current;
+       uint64_t tics;
        streamid_t nb_fs;
        streamid_t sm_id;
+       int interval, loop;
 #ifdef RTE_LIB_BITRATESTATS
-       uint64_t tics_per_1sec;
        uint64_t tics_datum;
-       uint64_t tics_current;
        uint16_t i, cnt_ports;
 
        cnt_ports = nb_ports;
        tics_datum = rte_rdtsc();
-       tics_per_1sec = rte_get_timer_hz();
 #endif
+       tics_per_1sec = rte_get_timer_hz();
+       tics = rte_rdtsc();
        fsm = &fwd_streams[fc->stream_idx];
        nb_fs = fc->stream_nb;
+       fc->lcore_id = rte_lcore_id();
+       interval = 0;
+       loop = 0;
        do {
                for (sm_id = 0; sm_id < nb_fs; sm_id++)
                        if (!fsm[sm_id]->disabled)
@@ -2284,8 +2290,58 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t 
pkt_fwd)
                                latencystats_lcore_id == rte_lcore_id())
                        rte_latencystats_update();
 #endif
-
+               if (record_core_cycles && ++loop == 1024) {
+                       loop = 0;
+                       tics_current = rte_rdtsc();
+                       if (tics_current - tics >= tics_per_1sec * 
LCORE_CYCLES_INTERVAL_LEN) {
+                               for (sm_id = 0; sm_id < nb_fs; sm_id++) {
+                                       struct fwd_stream *fs = fsm[sm_id];
+                                       if (fs->disabled)
+                                               continue;
+                                       fc->cycles[interval].busy += 
fs->interval_cycles.busy;
+                                       fc->cycles[interval].total += 
fs->interval_cycles.total;
+                                       fs->interval_cycles.busy = 0;
+                                       fs->interval_cycles.total = 0;
+                               }
+                               interval += 1;
+                               if (interval == LCORE_CYCLES_INTERVAL_COUNT) {
+                                       memmove(&fc->cycles[0], &fc->cycles[1],
+                                               (LCORE_CYCLES_INTERVAL_COUNT - 
1)
+                                               * sizeof(fc->cycles[0]));
+                                       interval = 0;
+                               }
+                               fc->cycles[interval].busy = 0;
+                               fc->cycles[interval].total = 0;
+                               tics = tics_current;
+                       }
+               }
        } while (! fc->stopped);
+       memset(&fc->cycles, 0, sizeof(fc->cycles));
+}
+
+static int
+lcore_busy_percent_callback(unsigned int lcore_id)
+{
+       uint64_t busy_cycles, total_cycles;
+       struct fwd_lcore *fc;
+       int i, c;
+
+       for (c = 0; c < nb_lcores; c++) {
+               fc = fwd_lcores[c];
+               if (fc->lcore_id != lcore_id)
+                       continue;
+               busy_cycles = total_cycles = 0;
+
+               for (i = 0; i < LCORE_CYCLES_INTERVAL_COUNT; i++) {
+                       busy_cycles += fc->cycles[i].busy;
+                       total_cycles += fc->cycles[i].total;
+               }
+               if (total_cycles == 0)
+                       return -1;
+               return 100 * busy_cycles / total_cycles;
+       }
+
+       return -1;
 }
 
 static int
@@ -4522,6 +4578,11 @@ main(int argc, char** argv)
                rte_stats_bitrate_reg(bitrate_data);
        }
 #endif
+
+       if (record_core_cycles)
+               rte_lcore_register_busy_percent_cb(lcore_busy_percent_callback,
+                       LCORE_CYCLES_INTERVAL_LEN * 
LCORE_CYCLES_INTERVAL_COUNT);
+
 #ifdef RTE_LIB_CMDLINE
        if (init_cmdline() != 0)
                rte_exit(EXIT_FAILURE,
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 7d24d25970d2..684a06919986 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -145,6 +145,14 @@ extern const struct rss_type_info rss_type_table[];
  */
 extern char dynf_names[64][RTE_MBUF_DYN_NAMESIZE];
 
+/**
+ * Used with --record-core-cycles.
+ */
+struct lcore_cycles {
+       uint64_t busy;
+       uint64_t total;
+};
+
 /**
  * The data structure associated with a forwarding stream between a receive
  * port/queue and a transmit port/queue.
@@ -175,6 +183,7 @@ struct fwd_stream {
        unsigned int gro_times; /**< GRO operation times */
 #endif
        uint64_t     core_cycles; /**< used for RX and TX processing */
+       struct lcore_cycles interval_cycles;
        struct pkt_burst_stats rx_burst_stats;
        struct pkt_burst_stats tx_burst_stats;
        struct fwd_lcore *lcore; /**< Lcore being scheduled. */
@@ -341,6 +350,9 @@ struct rte_port {
        struct xstat_display_info xstats_info;
 };
 
+#define LCORE_CYCLES_INTERVAL_COUNT 6
+#define LCORE_CYCLES_INTERVAL_LEN 10
+
 /**
  * The data structure associated with each forwarding logical core.
  * The logical cores are internally numbered by a core index from 0 to
@@ -360,6 +372,8 @@ struct fwd_lcore {
        streamid_t stream_nb;    /**< number of streams in "fwd_streams" */
        lcoreid_t  cpuid_idx;    /**< index of logical core in CPU id table */
        volatile char stopped;   /**< stop forwarding when set */
+       unsigned int lcore_id;   /**< return value of rte_lcore_id() */
+       struct lcore_cycles cycles[LCORE_CYCLES_INTERVAL_COUNT];  /**< busy 
percent stats */
 };
 
 /*
@@ -836,10 +850,15 @@ get_start_cycles(uint64_t *start_tsc)
 }
 
 static inline void
-get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc)
+get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc, uint64_t nb_packets)
 {
-       if (record_core_cycles)
-               fs->core_cycles += rte_rdtsc() - start_tsc;
+       if (record_core_cycles) {
+               uint64_t cycles = rte_rdtsc() - start_tsc;
+               fs->core_cycles += cycles;
+               fs->interval_cycles.total += cycles;
+               if (nb_packets > 0)
+                       fs->interval_cycles.busy += cycles;
+       }
 }
 
 static inline void
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 021624952daa..ad37626ff63c 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -331,7 +331,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
        struct rte_mbuf *pkt;
        struct rte_mempool *mbp;
        struct rte_ether_hdr eth_hdr;
-       uint16_t nb_tx;
+       uint16_t nb_tx = 0;
        uint16_t nb_pkt;
        uint16_t vlan_tci, vlan_tci_outer;
        uint32_t retry;
@@ -392,7 +392,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
        }
 
        if (nb_pkt == 0)
-               return;
+               goto end;
 
        nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
 
@@ -426,7 +426,8 @@ pkt_burst_transmit(struct fwd_stream *fs)
                } while (++nb_tx < nb_pkt);
        }
 
-       get_end_cycles(fs, start_tsc);
+end:
+       get_end_cycles(fs, start_tsc, nb_tx);
 }
 
 static int
-- 
2.38.1

Reply via email to