Added JD structures for load, unload and run jobs. Initialize job command and allocate memory for request structures for slow path jobs.
Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com> --- drivers/ml/cnxk/cn10k_ml_dev.h | 99 ++++++++++++++++++++++++++++++++ drivers/ml/cnxk/cn10k_ml_model.h | 4 ++ drivers/ml/cnxk/cn10k_ml_ops.c | 19 +++++- drivers/ml/cnxk/cn10k_ml_ops.h | 4 ++ 4 files changed, 125 insertions(+), 1 deletion(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index 02a4496c97..68fcc957fa 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -188,6 +188,105 @@ struct cn10k_ml_jd { uint8_t rsvd[8]; } fw_load; + + struct cn10k_ml_jd_section_model_start { + /* Source model start address in DDR relative to ML_MLR_BASE */ + uint64_t model_src_ddr_addr; + + /* Destination model start address in DDR relative to ML_MLR_BASE */ + uint64_t model_dst_ddr_addr; + + /* Offset to model init section in the model */ + uint64_t model_init_offset : 32; + + /* Size of init section in the model */ + uint64_t model_init_size : 32; + + /* Offset to model main section in the model */ + uint64_t model_main_offset : 32; + + /* Size of main section in the model */ + uint64_t model_main_size : 32; + + /* Offset to model finish section in the model */ + uint64_t model_finish_offset : 32; + + /* Size of finish section in the model */ + uint64_t model_finish_size : 32; + + /* Offset to WB in model bin */ + uint64_t model_wb_offset : 32; + + /* Number of model layers */ + uint64_t num_layers : 8; + + /* Number of gather entries, 0 means linear input mode (= no gather) */ + uint64_t num_gather_entries : 8; + + /* Number of scatter entries 0 means linear input mode (= no scatter) */ + uint64_t num_scatter_entries : 8; + + /* Tile mask to load model */ + uint64_t tilemask : 8; + + /* Batch size of model */ + uint64_t batch_size : 32; + + /* OCM WB base address */ + uint64_t ocm_wb_base_address : 32; + + /* OCM WB range start */ + uint64_t ocm_wb_range_start : 32; + + /* OCM WB range End */ + uint64_t ocm_wb_range_end : 32; + + /* DDR WB address */ + uint64_t ddr_wb_base_address; + + /* DDR WB range start */ + uint64_t ddr_wb_range_start : 32; + + /* DDR WB range end */ + uint64_t ddr_wb_range_end : 32; + + union { + /* Points to gather list if num_gather_entries > 0 */ + void *gather_list; + struct { + /* Linear input mode */ + uint64_t ddr_range_start : 32; + uint64_t ddr_range_end : 32; + } s; + } input; + + union { + /* Points to scatter list if num_scatter_entries > 0 */ + void *scatter_list; + struct { + /* Linear output mode */ + uint64_t ddr_range_start : 32; + uint64_t ddr_range_end : 32; + } s; + } output; + } model_start; + + struct cn10k_ml_jd_section_model_stop { + uint8_t rsvd[96]; + } model_stop; + + struct cn10k_ml_jd_section_model_run { + /* Address of the input for the run relative to ML_MLR_BASE */ + uint64_t input_ddr_addr; + + /* Address of the output for the run relative to ML_MLR_BASE */ + uint64_t output_ddr_addr; + + /* Number of batches to run in variable batch processing */ + uint16_t num_batches; + + uint8_t rsvd[78]; + } model_run; }; }; diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h index 7893635787..355915deeb 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.h +++ b/drivers/ml/cnxk/cn10k_ml_model.h @@ -11,6 +11,7 @@ #include "cn10k_ml_dev.h" #include "cn10k_ml_ocm.h" +#include "cn10k_ml_ops.h" /* Model state */ enum cn10k_ml_model_state { @@ -426,6 +427,9 @@ struct cn10k_ml_model { /* State */ enum cn10k_ml_model_state state; + + /* Slow-path operations request pointer */ + struct cn10k_ml_req *req; }; int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t size); diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 302ce8a452..56adce12ea 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -12,6 +12,10 @@ /* ML model macros */ #define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz" +/* ML Job descriptor flags */ +#define ML_FLAGS_POLL_COMPL BIT(0) +#define ML_FLAGS_SSO_COMPL BIT(1) + static void qp_memzone_name_get(char *name, int size, int dev_id, int qp_id) { @@ -65,6 +69,7 @@ cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_des struct cn10k_ml_qp *qp; uint32_t len; uint8_t *va; + uint64_t i; /* Allocate queue pair */ qp = rte_zmalloc_socket("cn10k_ml_pmd_queue_pair", sizeof(struct cn10k_ml_qp), ROC_ALIGN, @@ -95,6 +100,12 @@ cn10k_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_des qp->queue.wait_cycles = ML_CN10K_CMD_TIMEOUT * plt_tsc_hz(); qp->nb_desc = nb_desc; + /* Initialize job command */ + for (i = 0; i < qp->nb_desc; i++) { + memset(&qp->queue.reqs[i].jd, 0, sizeof(struct cn10k_ml_jd)); + qp->queue.reqs[i].jcmd.w1.s.jobptr = PLT_U64_CAST(&qp->queue.reqs[i].jd); + } + return qp; qp_free: @@ -468,7 +479,8 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, metadata->finish_model.file_size + metadata->weights_bias.file_size; model_data_size = PLT_ALIGN_CEIL(model_data_size, ML_CN10K_ALIGN_SIZE); mz_size = PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) + - 2 * model_data_size; + 2 * model_data_size + + PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_req), ML_CN10K_ALIGN_SIZE); /* Allocate memzone for model object and model data */ snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", CN10K_ML_MODEL_MEMZONE_NAME, idx); @@ -507,6 +519,11 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, model->model_mem_map.wb_pages = wb_pages; model->model_mem_map.scratch_pages = scratch_pages; + /* Set slow-path request address and state */ + model->req = PLT_PTR_ADD( + mz->addr, PLT_ALIGN_CEIL(sizeof(struct cn10k_ml_model), ML_CN10K_ALIGN_SIZE) + + 2 * model_data_size); + plt_spinlock_init(&model->lock); model->state = ML_CN10K_MODEL_STATE_LOADED; dev->data->models[idx] = model; diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index d7842ecd73..c86ce66f19 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.h +++ b/drivers/ml/cnxk/cn10k_ml_ops.h @@ -6,6 +6,7 @@ #define _CN10K_ML_OPS_H_ #include <rte_mldev.h> +#include <rte_mldev_pmd.h> #include <roc_api.h> @@ -21,6 +22,9 @@ struct cn10k_ml_req { /* Status field for poll mode requests */ volatile uint64_t status; + + /* Job command */ + struct ml_job_cmd_s jcmd; } __rte_aligned(ROC_ALIGN); /* Request queue */ -- 2.17.1