cnxk: add structures for slow and fast path JDs

Srikanth Yalavarthi Tue, 07 Feb 2023 08:09:16 -0800

Added JD structures for load, unload and run jobs. Initialize
job command and allocate memory for request structures for slow
path jobs.


Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   | 99 ++++++++++++++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_model.h |  4 ++
 drivers/ml/cnxk/cn10k_ml_ops.c   | 19 +++++-
 drivers/ml/cnxk/cn10k_ml_ops.h   |  4 ++
 4 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 02a4496c97..68fcc957fa 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -188,6 +188,105 @@ struct cn10k_ml_jd {
 
                        uint8_t rsvd[8];
                } fw_load;
+
+               struct cn10k_ml_jd_section_model_start {
+                       /* Source model start address in DDR relative to 
ML_MLR_BASE */
+                       uint64_t model_src_ddr_addr;
+
+                       /* Destination model start address in DDR relative to 
ML_MLR_BASE */
+                       uint64_t model_dst_ddr_addr;
+
+                       /* Offset to model init section in the model */
+                       uint64_t model_init_offset : 32;
+
+                       /* Size of init section in the model */
+                       uint64_t model_init_size : 32;
+
+                       /* Offset to model main section in the model */
+                       uint64_t model_main_offset : 32;
+
+                       /* Size of main section in the model */
+                       uint64_t model_main_size : 32;
+
+                       /* Offset to model finish section in the model */
+                       uint64_t model_finish_offset : 32;
+
+                       /* Size of finish section in the model */
+                       uint64_t model_finish_size : 32;
+
+                       /* Offset to WB in model bin */
+                       uint64_t model_wb_offset : 32;
+
+                       /* Number of model layers */
+                       uint64_t num_layers : 8;
+
+                       /* Number of gather entries, 0 means linear input mode 
(= no gather) */
+                       uint64_t num_gather_entries : 8;
+
+                       /* Number of scatter entries 0 means linear input mode 
(= no scatter) */
+                       uint64_t num_scatter_entries : 8;
+
+                       /* Tile mask to load model */
+                       uint64_t tilemask : 8;
+
+                       /* Batch size of model  */
+                       uint64_t batch_size : 32;
+
+                       /* OCM WB base address */
+                       uint64_t ocm_wb_base_address : 32;
+
+                       /* OCM WB range start */
+                       uint64_t ocm_wb_range_start : 32;
+
+                       /* OCM WB range End */
+                       uint64_t ocm_wb_range_end : 32;
+
+                       /* DDR WB address */
+                       uint64_t ddr_wb_base_address;
+
+                       /* DDR WB range start */
+                       uint64_t ddr_wb_range_start : 32;
+
+                       /* DDR WB range end */
+                       uint64_t ddr_wb_range_end : 32;
+
+                       union {
+                               /* Points to gather list if num_gather_entries 
> 0 */
+                               void *gather_list;
+                               struct {
+                                       /* Linear input mode */
+                                       uint64_t ddr_range_start : 32;
+                                       uint64_t ddr_range_end : 32;
+                               } s;
+                       } input;
+
+                       union {
+                               /* Points to scatter list if 
num_scatter_entries > 0 */
+                               void *scatter_list;
+                               struct {
+                                       /* Linear output mode */
+                                       uint64_t ddr_range_start : 32;
+                                       uint64_t ddr_range_end : 32;
+                               } s;
+                       } output;
+               } model_start;
+
+               struct cn10k_ml_jd_section_model_stop {
+                       uint8_t rsvd[96];
+               } model_stop;
+
+               struct cn10k_ml_jd_section_model_run {
+                       /* Address of the input for the run relative to 
ML_MLR_BASE */
+                       uint64_t input_ddr_addr;
+
+                       /* Address of the output for the run relative to 
ML_MLR_BASE */
+                       uint64_t output_ddr_addr;
+
+                       /* Number of batches to run in variable batch 
processing */
+                       uint16_t num_batches;
+
+                       uint8_t rsvd[78];
+               } model_run;
        };
 };
 
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 7893635787..355915deeb 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -11,6 +11,7 @@
 
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_ocm.h"
+#include "cn10k_ml_ops.h"
 
 /* Model state */
 enum cn10k_ml_model_state {
@@ -426,6 +427,9 @@ struct cn10k_ml_model {
 
        /* State */
        enum cn10k_ml_model_state state;
+
+       /* Slow-path operations request pointer */
+       struct cn10k_ml_req *req;
 };
 
 int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size);
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index 302ce8a452..56adce12ea 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -12,6 +12,10 @@
 /* ML model macros */
 #define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz"
 
+/* ML Job descriptor flags */
+#define ML_FLAGS_POLL_COMPL BIT(0)
+#define ML_FLAGS_SSO_COMPL  BIT(1)
+
 static void
 qp_memzone_name_get(char *name, int size, int dev_id, int qp_id)
 {
@@ -65,6 +69,7 @@ cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t 
qp_id, uint32_t nb_des
        struct cn10k_ml_qp *qp;
        uint32_t len;
        uint8_t *va;
+       uint64_t i;
 
        /* Allocate queue pair */
        qp = rte_zmalloc_socket("cn10k_ml_pmd_queue_pair", sizeof(struct 
cn10k_ml_qp), ROC_ALIGN,
@@ -95,6 +100,12 @@ cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t 
qp_id, uint32_t nb_des
        qp->queue.wait_cycles = ML_CN10K_CMD_TIMEOUT * plt_tsc_hz();
        qp->nb_desc = nb_desc;
 
+       /* Initialize job command */
+       for (i = 0; i < qp->nb_desc; i++) {
+               memset(&qp->queue.reqs[i].jd, 0, sizeof(struct cn10k_ml_jd));
+               qp->queue.reqs[i].jcmd.w1.s.jobptr = 
PLT_U64_CAST(&qp->queue.reqs[i].jd);
+       }
+
        return qp;
 
 qp_free:
@@ -468,7 +479,8 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct 
rte_ml_model_params *params,
                          metadata->finish_model.file_size + 
metadata->weights_bias.file_size;
        model_data_size = PLT_ALIGN_CEIL(model_data_size, ML_CN10K_ALIGN_SIZE);
        mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), 
ML_CN10K_ALIGN_SIZE) +
-                 2 * model_data_size;
+                 2 * model_data_size +
+                 PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), 
ML_CN10K_ALIGN_SIZE);
 
        /* Allocate memzone for model object and model data */
        snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", 
CN10K_ML_MODEL_MEMZONE_NAME, idx);
@@ -507,6 +519,11 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct 
rte_ml_model_params *params,
        model->model_mem_map.wb_pages = wb_pages;
        model->model_mem_map.scratch_pages = scratch_pages;
 
+       /* Set slow-path request address and state */
+       model->req = PLT_PTR_ADD(
+               mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), 
ML_CN10K_ALIGN_SIZE) +
+                                 2 * model_data_size);
+
        plt_spinlock_init(&model->lock);
        model->state = ML_CN10K_MODEL_STATE_LOADED;
        dev->data->models[idx] = model;
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h
index d7842ecd73..c86ce66f19 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.h
+++ b/drivers/ml/cnxk/cn10k_ml_ops.h
@@ -6,6 +6,7 @@
 #define _CN10K_ML_OPS_H_
 
 #include <rte_mldev.h>
+#include <rte_mldev_pmd.h>
 
 #include <roc_api.h>
 
@@ -21,6 +22,9 @@ struct cn10k_ml_req {
 
        /* Status field for poll mode requests */
        volatile uint64_t status;
+
+       /* Job command */
+       struct ml_job_cmd_s jcmd;
 } __rte_aligned(ROC_ALIGN);
 
 /* Request queue */
-- 
2.17.1

[PATCH v5 15/39] ml/cnxk: add structures for slow and fast path JDs

Reply via email to