cnxk: enable fast-path ops for TVM models

Srikanth Yalavarthi Wed, 20 Sep 2023 00:29:40 -0700

From: Anup Prabhu <apra...@marvell.com>

Enable fast-path ops support for TVM models. Models would
use TVMDP library function calls to execute inference
operations for Hybrid and LLVM model sub-types.


For TVM MRVL model subtypes that have a single MRVL layer,
the inference requests are directly enqueued to hardware
by the driver.

Signed-off-by: Anup Prabhu <apra...@marvell.com>
Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_ops.c   |   4 -
 drivers/ml/cnxk/cnxk_ml_io.h     |   6 ++
 drivers/ml/cnxk/cnxk_ml_ops.c    |   4 +
 drivers/ml/cnxk/cnxk_ml_ops.h    |   9 +++
 drivers/ml/cnxk/mvtvm_ml_model.c |  20 +++++
 drivers/ml/cnxk/mvtvm_ml_model.h |   6 ++
 drivers/ml/cnxk/mvtvm_ml_ops.c   | 124 +++++++++++++++++++++++++++++++
 drivers/ml/cnxk/mvtvm_ml_ops.h   |  43 +++++++++++
 8 files changed, 212 insertions(+), 4 deletions(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 140f7a343f..c1353fb0c8 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -287,10 +287,6 @@ cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, 
const struct rte_ml_dev_c
        else
                cn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf;
 
-       cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst;
-       cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst;
-       cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get;
-
        return 0;
 }
 
diff --git a/drivers/ml/cnxk/cnxk_ml_io.h b/drivers/ml/cnxk/cnxk_ml_io.h
index 5de166c252..6d5d25a7c9 100644
--- a/drivers/ml/cnxk/cnxk_ml_io.h
+++ b/drivers/ml/cnxk/cnxk_ml_io.h
@@ -47,6 +47,12 @@ struct cnxk_ml_io {
 
        /* Scale */
        float scale;
+
+       /* Dequantized offset */
+       uint32_t offset_d;
+
+       /* Quantized offset */
+       uint32_t offset_q;
 };
 
 /* Model / Layer IO structure */
diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c
index f281e6070f..274d152b81 100644
--- a/drivers/ml/cnxk/cnxk_ml_ops.c
+++ b/drivers/ml/cnxk/cnxk_ml_ops.c
@@ -770,6 +770,10 @@ cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct 
rte_ml_dev_config *co
        cnxk_mldev->max_nb_layers =
                
cnxk_mldev->cn10k_mldev.fw.req->cn10k_req.jd.fw_load.cap.s.max_models;
 
+       cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst;
+       cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst;
+       cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get;
+
        /* Allocate and initialize index_map */
        if (cnxk_mldev->index_map == NULL) {
                cnxk_mldev->index_map =
diff --git a/drivers/ml/cnxk/cnxk_ml_ops.h b/drivers/ml/cnxk/cnxk_ml_ops.h
index 2575f4c6e1..62e2b17e35 100644
--- a/drivers/ml/cnxk/cnxk_ml_ops.h
+++ b/drivers/ml/cnxk/cnxk_ml_ops.h
@@ -12,12 +12,21 @@
 
 #include "cn10k_ml_ops.h"
 
+#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
+#include "mvtvm_ml_ops.h"
+#endif
+
 /* Request structure */
 struct cnxk_ml_req {
        /* Device specific request */
        union {
                /* CN10K */
                struct cn10k_ml_req cn10k_req;
+
+#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM
+               /* MVTVM */
+               struct mvtvm_ml_req mvtvm_req;
+#endif
        };
 
        /* Address of status field */
diff --git a/drivers/ml/cnxk/mvtvm_ml_model.c b/drivers/ml/cnxk/mvtvm_ml_model.c
index 7086c7a407..8af84b6972 100644
--- a/drivers/ml/cnxk/mvtvm_ml_model.c
+++ b/drivers/ml/cnxk/mvtvm_ml_model.c
@@ -136,6 +136,16 @@ mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model)
                model->mvtvm.info.total_input_sz_d += 
model->mvtvm.info.input[i].sz_d;
                model->mvtvm.info.total_input_sz_q += 
model->mvtvm.info.input[i].sz_q;
 
+               model->mvtvm.info.input[i].offset_d = 
model->mvtvm.info.total_input_sz_d;
+               model->mvtvm.info.input[i].offset_q = 
model->mvtvm.info.total_input_sz_q;
+
+               model->mvtvm.input_tensor[i].device = metadata->input[i].device;
+               model->mvtvm.input_tensor[i].ndim = metadata->input[i].ndim;
+               model->mvtvm.input_tensor[i].dtype = 
metadata->input[i].datatype;
+               model->mvtvm.input_tensor[i].shape = metadata->input[i].shape;
+               model->mvtvm.input_tensor[i].strides = NULL;
+               model->mvtvm.input_tensor[i].byte_offset = 
model->mvtvm.info.input[i].offset_q;
+
                plt_ml_dbg("model_id = %u, input[%u] - sz_d = %u sz_q = %u", 
model->model_id, i,
                           model->mvtvm.info.input[i].sz_d, 
model->mvtvm.info.input[i].sz_q);
        }
@@ -169,6 +179,16 @@ mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model)
                model->mvtvm.info.total_output_sz_d += 
model->mvtvm.info.output[i].sz_d;
                model->mvtvm.info.total_output_sz_q += 
model->mvtvm.info.output[i].sz_q;
 
+               model->mvtvm.info.output[i].offset_d = 
model->mvtvm.info.total_output_sz_d;
+               model->mvtvm.info.output[i].offset_q = 
model->mvtvm.info.total_output_sz_q;
+
+               model->mvtvm.output_tensor[i].device = 
metadata->output[i].device;
+               model->mvtvm.output_tensor[i].ndim = metadata->output[i].ndim;
+               model->mvtvm.output_tensor[i].dtype = 
metadata->output[i].datatype;
+               model->mvtvm.output_tensor[i].shape = metadata->output[i].shape;
+               model->mvtvm.output_tensor[i].strides = NULL;
+               model->mvtvm.output_tensor[i].byte_offset = 
model->mvtvm.info.output[i].offset_q;
+
                plt_ml_dbg("model_id = %u, output[%u] - sz_d = %u sz_q = %u", 
model->model_id, i,
                           model->mvtvm.info.output[i].sz_d, 
model->mvtvm.info.output[i].sz_q);
        }
diff --git a/drivers/ml/cnxk/mvtvm_ml_model.h b/drivers/ml/cnxk/mvtvm_ml_model.h
index 57a6ce0bb1..08e101bbe7 100644
--- a/drivers/ml/cnxk/mvtvm_ml_model.h
+++ b/drivers/ml/cnxk/mvtvm_ml_model.h
@@ -71,6 +71,12 @@ struct mvtvm_ml_model_data {
 
        /* Stats for burst ops */
        struct mvtvm_ml_model_xstats *burst_xstats;
+
+       /* Input Tensor */
+       DLTensor input_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+       /* Output Tensor */
+       DLTensor output_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
 };
 
 int mvtvm_ml_model_blob_parse(struct rte_ml_model_params *params,
diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c
index 5292ac97fe..2baac8f72f 100644
--- a/drivers/ml/cnxk/mvtvm_ml_ops.c
+++ b/drivers/ml/cnxk/mvtvm_ml_ops.c
@@ -21,6 +21,12 @@
 /* ML model macros */
 #define MVTVM_ML_MODEL_MEMZONE_NAME "ml_mvtvm_model_mz"
 
+__rte_hot static void
+mvtvm_ml_set_poll_addr(struct cnxk_ml_req *req)
+{
+       req->status = &req->mvtvm_req.status;
+}
+
 int
 mvtvm_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct 
rte_ml_dev_config *conf)
 {
@@ -172,6 +178,7 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct 
rte_ml_model_params *
                callback->tvmrt_free = cn10k_ml_free;
                callback->tvmrt_quantize = mvtvm_ml_io_quantize;
                callback->tvmrt_dequantize = mvtvm_ml_io_dequantize;
+               callback->tvmrt_inference = cn10k_ml_inference_sync;
        } else {
                callback = NULL;
        }
@@ -215,6 +222,19 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct 
rte_ml_model_params *
                model->mvtvm.burst_xstats[qp_id].dequeued_count = 0;
        }
 
+       /* Set model specific fast path functions */
+       if (model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) {
+               model->enqueue_single = cn10k_ml_enqueue_single;
+               model->result_update = cn10k_ml_result_update;
+               model->set_error_code = cn10k_ml_set_error_code;
+               model->set_poll_addr = cn10k_ml_set_poll_addr;
+       } else {
+               model->enqueue_single = mvtvm_ml_enqueue_single;
+               model->result_update = mvtvm_ml_result_update;
+               model->set_error_code = mvtvm_ml_set_error_code;
+               model->set_poll_addr = mvtvm_ml_set_poll_addr;
+       }
+
        return 0;
 
 error:
@@ -425,3 +445,107 @@ mvtvm_ml_io_dequantize(void *device, uint16_t model_id, 
const char *layer_name,
 
        return 0;
 }
+
+static int
+mvtvm_ml_model_run(struct cnxk_ml_model *model, struct rte_ml_op *op, struct 
cnxk_ml_req *req)
+{
+       uint8_t i;
+
+       rte_memcpy(req->mvtvm_req.input_tensor, model->mvtvm.input_tensor,
+                  model->mvtvm.metadata.model.num_input * sizeof(DLTensor));
+       for (i = 0; i < model->mvtvm.metadata.model.num_input; i++) {
+               req->mvtvm_req.input_tensor[i].data = op->input[i]->addr;
+               req->mvtvm_req.input_tensor[i].byte_offset = 0;
+       }
+
+       rte_memcpy(req->mvtvm_req.output_tensor, model->mvtvm.output_tensor,
+                  model->mvtvm.metadata.model.num_output * sizeof(DLTensor));
+       for (i = 0; i < model->mvtvm.metadata.model.num_output; i++) {
+               req->mvtvm_req.output_tensor[i].data = op->output[i]->addr;
+               req->mvtvm_req.output_tensor[i].byte_offset = 0;
+       }
+
+       tvmdp_model_run(model->model_id, model->mvtvm.metadata.model.num_input,
+                       req->mvtvm_req.input_tensor, 
model->mvtvm.metadata.model.num_output,
+                       req->mvtvm_req.output_tensor, &req->mvtvm_req.result,
+                       &req->mvtvm_req.status);
+
+       plt_write64(ML_CNXK_POLL_JOB_FINISH, req->status);
+
+       return 0;
+}
+
+__rte_hot void
+mvtvm_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t 
stype)
+{
+       RTE_SET_USED(stype);
+
+       req->mvtvm_req.result.error_code = etype;
+}
+
+__rte_hot bool
+mvtvm_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, 
uint16_t layer_id,
+                       struct cnxk_ml_qp *qp, uint64_t head)
+{
+       struct cnxk_ml_model *model;
+       struct cnxk_ml_queue *queue;
+       struct cnxk_ml_req *req;
+
+       RTE_SET_USED(layer_id);
+
+       queue = &qp->queue;
+       req = &queue->reqs[head];
+       model = cnxk_mldev->mldev->data->models[op->model_id];
+
+       model->set_poll_addr(req);
+       memset(&req->mvtvm_req.result, 0, sizeof(struct mvtvm_ml_result));
+       req->mvtvm_req.result.error_code = 0x0;
+       req->mvtvm_req.result.user_ptr = op->user_ptr;
+
+       cnxk_ml_set_poll_ptr(req);
+       mvtvm_ml_model_run(model, op, req);
+       req->timeout = plt_tsc_cycles() + queue->wait_cycles;
+       req->op = op;
+
+       return true;
+}
+
+__rte_hot void
+mvtvm_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void 
*request)
+{
+       struct mvtvm_ml_model_xstats *xstats;
+       struct mvtvm_ml_result *result;
+       struct cnxk_ml_model *model;
+       struct cnxk_ml_req *req;
+       uint64_t tvm_rt_latency;
+       struct cnxk_ml_qp *qp;
+       struct rte_ml_op *op;
+
+       req = (struct cnxk_ml_req *)request;
+       result = &req->mvtvm_req.result;
+       op = req->op;
+       qp = cnxk_mldev->mldev->data->queue_pairs[qp_id];
+       op->impl_opaque = result->error_code;
+
+       if (likely(result->error_code == 0)) {
+               qp->stats.dequeued_count++;
+               op->status = RTE_ML_OP_STATUS_SUCCESS;
+
+               model = cnxk_mldev->mldev->data->models[op->model_id];
+               xstats = &model->mvtvm.burst_xstats[qp_id];
+
+               if (unlikely(xstats->dequeued_count == 
xstats->tvm_rt_reset_count)) {
+                       xstats->tvm_rt_latency_min = UINT64_MAX;
+                       xstats->tvm_rt_latency_max = 0;
+               }
+               tvm_rt_latency = result->stats.end_ns - result->stats.start_ns;
+               xstats->tvm_rt_latency = tvm_rt_latency;
+               xstats->tvm_rt_latency_tot += tvm_rt_latency;
+               xstats->tvm_rt_latency_min = 
RTE_MIN(xstats->tvm_rt_latency_min, tvm_rt_latency);
+               xstats->tvm_rt_latency_max = 
RTE_MAX(xstats->tvm_rt_latency_max, tvm_rt_latency);
+               xstats->dequeued_count++;
+       } else {
+               qp->stats.dequeue_err_count++;
+               op->status = RTE_ML_OP_STATUS_ERROR;
+       }
+}
diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.h b/drivers/ml/cnxk/mvtvm_ml_ops.h
index a1a868ef4b..82292ceadd 100644
--- a/drivers/ml/cnxk/mvtvm_ml_ops.h
+++ b/drivers/ml/cnxk/mvtvm_ml_ops.h
@@ -13,6 +13,44 @@
 
 struct cnxk_ml_dev;
 struct cnxk_ml_model;
+struct cnxk_ml_qp;
+struct cnxk_ml_req;
+
+/* Inference stats */
+struct mvtvm_ml_stats {
+       /* Start ns */
+       uint64_t start_ns;
+
+       /* Start ns */
+       uint64_t end_ns;
+};
+
+/* Result structure */
+struct mvtvm_ml_result {
+       /* Job error code */
+       uint64_t error_code;
+
+       /* Inference stats */
+       struct mvtvm_ml_stats stats;
+
+       /* User context pointer */
+       void *user_ptr;
+};
+
+/* MVTVM specific request */
+struct mvtvm_ml_req {
+       /* Input tensors */
+       DLTensor input_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+       /* Output tensors */
+       DLTensor output_tensor[ML_CNXK_MODEL_MAX_INPUT_OUTPUT];
+
+       /* Status field for poll mode requests */
+       volatile uint64_t status;
+
+       /* Result */
+       struct mvtvm_ml_result result;
+};
 
 int mvtvm_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct 
rte_ml_dev_config *conf);
 int mvtvm_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev);
@@ -26,4 +64,9 @@ int mvtvm_ml_io_quantize(void *device, uint16_t model_id, 
const char *layer_name
 int mvtvm_ml_io_dequantize(void *device, uint16_t model_id, const char 
*layer_name, void *qbuffer,
                           const DLTensor **deq_tensor);
 
+__rte_hot bool mvtvm_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct 
rte_ml_op *op,
+                                      uint16_t layer_id, struct cnxk_ml_qp 
*qp, uint64_t head);
+__rte_hot void mvtvm_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int 
qp_id, void *request);
+__rte_hot void mvtvm_ml_set_error_code(struct cnxk_ml_req *req, uint64_t 
etype, uint64_t stype);
+
 #endif /* _MVTVM_ML_OPS_H_ */
-- 
2.41.0

[PATCH v2 33/34] ml/cnxk: enable fast-path ops for TVM models

Reply via email to