Enable reporting driver xstats and inference end-to-end
latency and throughput in mldev inference tests. Reporting
of stats can be enabled using "--stats" option.

Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com>
Acked-by: Anup Prabhu <apra...@marvell.com>
---
 app/test-mldev/ml_options.c                |  10 +-
 app/test-mldev/ml_options.h                |   2 +
 app/test-mldev/test_inference_common.c     | 140 +++++++++++++++++++++
 app/test-mldev/test_inference_common.h     |   8 ++
 app/test-mldev/test_inference_interleave.c |   4 +
 app/test-mldev/test_inference_ordered.c    |   1 +
 doc/guides/tools/testmldev.rst             |   7 ++
 7 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/app/test-mldev/ml_options.c b/app/test-mldev/ml_options.c
index da30796a6b..2efcc3532c 100644
--- a/app/test-mldev/ml_options.c
+++ b/app/test-mldev/ml_options.c
@@ -30,6 +30,7 @@ ml_options_default(struct ml_options *opt)
        opt->queue_size = 1;
        opt->batches = 0;
        opt->tolerance = 0.0;
+       opt->stats = false;
        opt->debug = false;
 }
 
@@ -216,7 +217,8 @@ ml_dump_test_options(const char *testname)
                       "\t\t--queue_pairs      : number of queue pairs to 
create\n"
                       "\t\t--queue_size       : size fo queue-pair\n"
                       "\t\t--batches          : number of batches of input\n"
-                      "\t\t--tolerance        : maximum tolerance (%%) for 
output validation\n");
+                      "\t\t--tolerance        : maximum tolerance (%%) for 
output validation\n"
+                      "\t\t--stats            : enable reporting performance 
statistics\n");
                printf("\n");
        }
 }
@@ -248,6 +250,7 @@ static struct option lgopts[] = {
        {ML_QUEUE_SIZE, 1, 0, 0},
        {ML_BATCHES, 1, 0, 0},
        {ML_TOLERANCE, 1, 0, 0},
+       {ML_STATS, 0, 0, 0},
        {ML_DEBUG, 0, 0, 0},
        {ML_HELP, 0, 0, 0},
        {NULL, 0, 0, 0}};
@@ -290,6 +293,11 @@ ml_options_parse(struct ml_options *opt, int argc, char 
**argv)
        while ((opts = getopt_long(argc, argv, "", lgopts, &opt_idx)) != EOF) {
                switch (opts) {
                case 0: /* parse long options */
+                       if (!strcmp(lgopts[opt_idx].name, "stats")) {
+                               opt->stats = true;
+                               break;
+                       }
+
                        if (!strcmp(lgopts[opt_idx].name, "debug")) {
                                opt->debug = true;
                                break;
diff --git a/app/test-mldev/ml_options.h b/app/test-mldev/ml_options.h
index 7f3db29656..beb0fe69c6 100644
--- a/app/test-mldev/ml_options.h
+++ b/app/test-mldev/ml_options.h
@@ -23,6 +23,7 @@
 #define ML_QUEUE_SIZE  ("queue_size")
 #define ML_BATCHES     ("batches")
 #define ML_TOLERANCE   ("tolerance")
+#define ML_STATS       ("stats")
 #define ML_DEBUG       ("debug")
 #define ML_HELP               ("help")
 
@@ -45,6 +46,7 @@ struct ml_options {
        uint16_t queue_size;
        uint16_t batches;
        float tolerance;
+       bool stats;
        bool debug;
 };
 
diff --git a/app/test-mldev/test_inference_common.c 
b/app/test-mldev/test_inference_common.c
index b605c1f5d3..e85f32be60 100644
--- a/app/test-mldev/test_inference_common.c
+++ b/app/test-mldev/test_inference_common.c
@@ -6,6 +6,7 @@
 #include <unistd.h>
 
 #include <rte_common.h>
+#include <rte_cycles.h>
 #include <rte_hash_crc.h>
 #include <rte_launch.h>
 #include <rte_lcore.h>
@@ -37,6 +38,17 @@
                }                                                               
                   \
        } while (0)
 
+static void
+print_line(uint16_t len)
+{
+       uint16_t i;
+
+       for (i = 0; i < len; i++)
+               printf("-");
+
+       printf("\n");
+}
+
 /* Enqueue inference requests with burst size equal to 1 */
 static int
 ml_enqueue_single(void *arg)
@@ -46,6 +58,7 @@ ml_enqueue_single(void *arg)
        struct rte_ml_op *op = NULL;
        struct ml_core_args *args;
        uint64_t model_enq = 0;
+       uint64_t start_cycle;
        uint32_t burst_enq;
        uint32_t lcore_id;
        uint16_t fid;
@@ -53,6 +66,7 @@ ml_enqueue_single(void *arg)
 
        lcore_id = rte_lcore_id();
        args = &t->args[lcore_id];
+       args->start_cycles = 0;
        model_enq = 0;
 
        if (args->nb_reqs == 0)
@@ -88,10 +102,12 @@ ml_enqueue_single(void *arg)
        req->fid = fid;
 
 enqueue_req:
+       start_cycle = rte_get_tsc_cycles();
        burst_enq = rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 
1);
        if (burst_enq == 0)
                goto enqueue_req;
 
+       args->start_cycles += start_cycle;
        fid++;
        if (likely(fid <= args->end_fid))
                goto next_model;
@@ -115,10 +131,12 @@ ml_dequeue_single(void *arg)
        uint64_t total_deq = 0;
        uint8_t nb_filelist;
        uint32_t burst_deq;
+       uint64_t end_cycle;
        uint32_t lcore_id;
 
        lcore_id = rte_lcore_id();
        args = &t->args[lcore_id];
+       args->end_cycles = 0;
        nb_filelist = args->end_fid - args->start_fid + 1;
 
        if (args->nb_reqs == 0)
@@ -126,9 +144,11 @@ ml_dequeue_single(void *arg)
 
 dequeue_req:
        burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, &op, 
1);
+       end_cycle = rte_get_tsc_cycles();
 
        if (likely(burst_deq == 1)) {
                total_deq += burst_deq;
+               args->end_cycles += end_cycle;
                if (unlikely(op->status == RTE_ML_OP_STATUS_ERROR)) {
                        rte_ml_op_error_get(t->cmn.opt->dev_id, op, &error);
                        ml_err("error_code = 0x%" PRIx64 ", error_message = 
%s\n", error.errcode,
@@ -152,6 +172,7 @@ ml_enqueue_burst(void *arg)
 {
        struct test_inference *t = ml_test_priv((struct ml_test *)arg);
        struct ml_core_args *args;
+       uint64_t start_cycle;
        uint16_t ops_count;
        uint64_t model_enq;
        uint16_t burst_enq;
@@ -164,6 +185,7 @@ ml_enqueue_burst(void *arg)
 
        lcore_id = rte_lcore_id();
        args = &t->args[lcore_id];
+       args->start_cycles = 0;
        model_enq = 0;
 
        if (args->nb_reqs == 0)
@@ -205,8 +227,10 @@ ml_enqueue_burst(void *arg)
        pending = ops_count;
 
 enqueue_reqs:
+       start_cycle = rte_get_tsc_cycles();
        burst_enq =
                rte_ml_enqueue_burst(t->cmn.opt->dev_id, args->qp_id, 
&args->enq_ops[idx], pending);
+       args->start_cycles += burst_enq * start_cycle;
        pending = pending - burst_enq;
 
        if (pending > 0) {
@@ -236,11 +260,13 @@ ml_dequeue_burst(void *arg)
        uint64_t total_deq = 0;
        uint16_t burst_deq = 0;
        uint8_t nb_filelist;
+       uint64_t end_cycle;
        uint32_t lcore_id;
        uint32_t i;
 
        lcore_id = rte_lcore_id();
        args = &t->args[lcore_id];
+       args->end_cycles = 0;
        nb_filelist = args->end_fid - args->start_fid + 1;
 
        if (args->nb_reqs == 0)
@@ -249,9 +275,11 @@ ml_dequeue_burst(void *arg)
 dequeue_burst:
        burst_deq = rte_ml_dequeue_burst(t->cmn.opt->dev_id, args->qp_id, 
args->deq_ops,
                                         t->cmn.opt->burst_size);
+       end_cycle = rte_get_tsc_cycles();
 
        if (likely(burst_deq > 0)) {
                total_deq += burst_deq;
+               args->end_cycles += burst_deq * end_cycle;
 
                for (i = 0; i < burst_deq; i++) {
                        if (unlikely(args->deq_ops[i]->status == 
RTE_ML_OP_STATUS_ERROR)) {
@@ -381,6 +409,7 @@ test_inference_opt_dump(struct ml_options *opt)
        ml_dump("queue_pairs", "%u", opt->queue_pairs);
        ml_dump("queue_size", "%u", opt->queue_size);
        ml_dump("tolerance", "%-7.3f", opt->tolerance);
+       ml_dump("stats", "%s", (opt->stats ? "true" : "false"));
 
        if (opt->batches == 0)
                ml_dump("batches", "%u (default)", opt->batches);
@@ -454,6 +483,11 @@ test_inference_setup(struct ml_test *test, struct 
ml_options *opt)
                        RTE_CACHE_LINE_SIZE, opt->socket_id);
        }
 
+       for (i = 0; i < RTE_MAX_LCORE; i++) {
+               t->args[i].start_cycles = 0;
+               t->args[i].end_cycles = 0;
+       }
+
        return 0;
 
 error:
@@ -986,3 +1020,109 @@ ml_inference_launch_cores(struct ml_test *test, struct 
ml_options *opt, uint16_t
 
        return 0;
 }
+
+int
+ml_inference_stats_get(struct ml_test *test, struct ml_options *opt)
+{
+       struct test_inference *t = ml_test_priv(test);
+       uint64_t total_cycles = 0;
+       uint32_t nb_filelist;
+       uint64_t throughput;
+       uint64_t avg_e2e;
+       uint32_t qp_id;
+       uint64_t freq;
+       int ret;
+       int i;
+
+       if (!opt->stats)
+               return 0;
+
+       /* get xstats size */
+       t->xstats_size = rte_ml_dev_xstats_names_get(opt->dev_id, NULL, 0);
+       if (t->xstats_size >= 0) {
+               /* allocate for xstats_map and values */
+               t->xstats_map = rte_malloc(
+                       "ml_xstats_map", t->xstats_size * sizeof(struct 
rte_ml_dev_xstats_map), 0);
+               if (t->xstats_map == NULL) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
+
+               t->xstats_values =
+                       rte_malloc("ml_xstats_values", t->xstats_size * 
sizeof(uint64_t), 0);
+               if (t->xstats_values == NULL) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
+
+               ret = rte_ml_dev_xstats_names_get(opt->dev_id, t->xstats_map, 
t->xstats_size);
+               if (ret != t->xstats_size) {
+                       printf("Unable to get xstats names, ret = %d\n", ret);
+                       ret = -1;
+                       goto error;
+               }
+
+               for (i = 0; i < t->xstats_size; i++)
+                       rte_ml_dev_xstats_get(opt->dev_id, &t->xstats_map[i].id,
+                                             &t->xstats_values[i], 1);
+       }
+
+       /* print xstats*/
+       printf("\n");
+       print_line(80);
+       printf(" ML Device Extended Statistics\n");
+       print_line(80);
+       for (i = 0; i < t->xstats_size; i++)
+               printf(" %-64s = %" PRIu64 "\n", t->xstats_map[i].name, 
t->xstats_values[i]);
+       print_line(80);
+
+       /* release buffers */
+       if (t->xstats_map)
+               rte_free(t->xstats_map);
+
+       if (t->xstats_values)
+               rte_free(t->xstats_values);
+
+       /* print end-to-end stats */
+       freq = rte_get_tsc_hz();
+       for (qp_id = 0; qp_id < RTE_MAX_LCORE; qp_id++)
+               total_cycles += t->args[qp_id].end_cycles - 
t->args[qp_id].start_cycles;
+       avg_e2e = total_cycles / opt->repetitions;
+
+       if (freq == 0) {
+               avg_e2e = total_cycles / opt->repetitions;
+               printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency 
(cycles)", avg_e2e);
+       } else {
+               avg_e2e = (total_cycles * NS_PER_S) / (opt->repetitions * freq);
+               printf(" %-64s = %" PRIu64 "\n", "Average End-to-End Latency 
(ns)", avg_e2e);
+       }
+
+       /* print inference throughput */
+       if (strcmp(opt->test_name, "inference_ordered") == 0)
+               nb_filelist = 1;
+       else
+               nb_filelist = opt->nb_filelist;
+
+       if (freq == 0) {
+               throughput = (nb_filelist * t->cmn.opt->repetitions * 1000000) 
/ total_cycles;
+               printf(" %-64s = %" PRIu64 "\n", "Average Throughput 
(inferences / million cycles)",
+                      throughput);
+       } else {
+               throughput = (nb_filelist * t->cmn.opt->repetitions * freq) / 
total_cycles;
+               printf(" %-64s = %" PRIu64 "\n", "Average Throughput 
(inferences / second)",
+                      throughput);
+       }
+
+       print_line(80);
+
+       return 0;
+
+error:
+       if (t->xstats_map)
+               rte_free(t->xstats_map);
+
+       if (t->xstats_values)
+               rte_free(t->xstats_values);
+
+       return ret;
+}
diff --git a/app/test-mldev/test_inference_common.h 
b/app/test-mldev/test_inference_common.h
index 2e4889e1f7..0b4fba78e0 100644
--- a/app/test-mldev/test_inference_common.h
+++ b/app/test-mldev/test_inference_common.h
@@ -27,6 +27,9 @@ struct ml_core_args {
        struct rte_ml_op **enq_ops;
        struct rte_ml_op **deq_ops;
        struct ml_request **reqs;
+
+       uint64_t start_cycles;
+       uint64_t end_cycles;
 };
 
 struct test_inference {
@@ -46,6 +49,10 @@ struct test_inference {
 
        struct ml_core_args args[RTE_MAX_LCORE];
        uint64_t error_count[RTE_MAX_LCORE];
+
+       struct rte_ml_dev_xstats_map *xstats_map;
+       uint64_t *xstats_values;
+       int xstats_size;
 } __rte_cache_aligned;
 
 bool test_inference_cap_check(struct ml_options *opt);
@@ -63,5 +70,6 @@ void ml_inference_mem_destroy(struct ml_test *test, struct 
ml_options *opt);
 int ml_inference_result(struct ml_test *test, struct ml_options *opt, uint16_t 
fid);
 int ml_inference_launch_cores(struct ml_test *test, struct ml_options *opt, 
uint16_t start_fid,
                              uint16_t end_fid);
+int ml_inference_stats_get(struct ml_test *test, struct ml_options *opt);
 
 #endif /* _ML_TEST_INFERENCE_COMMON_ */
diff --git a/app/test-mldev/test_inference_interleave.c 
b/app/test-mldev/test_inference_interleave.c
index 9cf4cfa197..bd2c286737 100644
--- a/app/test-mldev/test_inference_interleave.c
+++ b/app/test-mldev/test_inference_interleave.c
@@ -56,7 +56,11 @@ test_inference_interleave_driver(struct ml_test *test, 
struct ml_options *opt)
                        goto error;
 
                ml_inference_iomem_destroy(test, opt, fid);
+       }
+
+       ml_inference_stats_get(test, opt);
 
+       for (fid = 0; fid < opt->nb_filelist; fid++) {
                ret = ml_model_stop(test, opt, &t->model[fid], fid);
                if (ret != 0)
                        goto error;
diff --git a/app/test-mldev/test_inference_ordered.c 
b/app/test-mldev/test_inference_ordered.c
index 1cd91dc3d3..8992358936 100644
--- a/app/test-mldev/test_inference_ordered.c
+++ b/app/test-mldev/test_inference_ordered.c
@@ -54,6 +54,7 @@ test_inference_ordered_driver(struct ml_test *test, struct 
ml_options *opt)
                goto error;
 
        ml_inference_iomem_destroy(test, opt, fid);
+       ml_inference_stats_get(test, opt);
 
        /* stop model */
        ret = ml_model_stop(test, opt, &t->model[fid], fid);
diff --git a/doc/guides/tools/testmldev.rst b/doc/guides/tools/testmldev.rst
index 25dc878f25..e51d780cd5 100644
--- a/doc/guides/tools/testmldev.rst
+++ b/doc/guides/tools/testmldev.rst
@@ -116,6 +116,10 @@ The following are the command-line options supported by 
the test application.
         Set the tolerance value in percentage to be used for output 
validation. Default value
         is `0`.
 
+* ``--stats``
+
+        Enable reporting device extended stats.
+
 * ``--debug``
 
         Enable the tests to run in debug mode.
@@ -279,6 +283,7 @@ Supported command line options for inference tests are 
following::
         --queue_size
         --batches
         --tolerance
+        --stats
 
 
 List of files to be used for the inference tests can be specified through the 
option
@@ -300,6 +305,8 @@ inference output and reference output are compared. When 
the tolerance is non-ze
 comparison of output is performed. Validation is considered as successful only 
when all the
 elements of the output tensor are with in the tolerance range specified.
 
+Enabling ``--stats`` would print the extended stats supported by the driver.
+
 .. Note::
 
     * The ``--filelist <file_list>`` is a mandatory option for running 
inference tests.
-- 
2.17.1

Reply via email to