Renamed cnxk error codes as cn10k error codes. Added support for model specific op_error_get routines.
Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com> --- drivers/ml/cnxk/cn10k_ml_dev.c | 8 ++++++++ drivers/ml/cnxk/cn10k_ml_dev.h | 16 ++++++++++++++++ drivers/ml/cnxk/cn10k_ml_ops.c | 20 +++++++++++--------- drivers/ml/cnxk/cn10k_ml_ops.h | 2 +- drivers/ml/cnxk/cnxk_ml_dev.c | 8 -------- drivers/ml/cnxk/cnxk_ml_dev.h | 18 +----------------- drivers/ml/cnxk/cnxk_ml_model.h | 3 +++ drivers/ml/cnxk/cnxk_ml_ops.c | 18 ++++++++++++++---- drivers/ml/cnxk/cnxk_ml_ops.h | 2 ++ drivers/ml/cnxk/mvtvm_ml_ops.c | 13 +++++++++++++ drivers/ml/cnxk/mvtvm_ml_ops.h | 2 ++ 11 files changed, 71 insertions(+), 39 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index 41f3b7a95da..2e719919ce1 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -58,6 +58,14 @@ static const char *const valid_args[] = {CN10K_ML_FW_PATH, /* Supported OCM page sizes: 1KB, 2KB, 4KB, 8KB and 16KB */ static const int valid_ocm_page_size[] = {1024, 2048, 4096, 8192, 16384}; +/* Error type database */ +struct cn10k_ml_error_db ml_etype_db[] = { + {ML_CN10K_ETYPE_NO_ERROR, "NO_ERROR"}, {ML_CN10K_ETYPE_FW_NONFATAL, "FW_NON_FATAL"}, + {ML_CN10K_ETYPE_HW_NONFATAL, "HW_NON_FATAL"}, {ML_CN10K_ETYPE_HW_FATAL, "HW_FATAL"}, + {ML_CN10K_ETYPE_HW_WARNING, "HW_WARNING"}, {ML_CN10K_ETYPE_DRIVER, "DRIVER_ERROR"}, + {ML_CN10K_ETYPE_UNKNOWN, "UNKNOWN_ERROR"}, +}; + static int parse_string_arg(const char *key __rte_unused, const char *value, void *extra_args) { diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index ddb8b67e06e..dadb3b571ba 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -46,6 +46,22 @@ struct cnxk_ml_dev; struct cnxk_ml_req; struct cnxk_ml_qp; +/* Error types enumeration */ +enum cn10k_ml_error_etype { + /* 0x0 */ ML_CN10K_ETYPE_NO_ERROR = 0, /* No error */ + /* 0x1 */ ML_CN10K_ETYPE_FW_NONFATAL, /* Firmware non-fatal error */ + /* 0x2 */ ML_CN10K_ETYPE_HW_NONFATAL, /* Hardware non-fatal error */ + /* 0x3 */ ML_CN10K_ETYPE_HW_FATAL, /* Hardware fatal error */ + /* 0x4 */ ML_CN10K_ETYPE_HW_WARNING, /* Hardware warning */ + /* 0x5 */ ML_CN10K_ETYPE_DRIVER, /* Driver specific error */ + /* 0x6 */ ML_CN10K_ETYPE_UNKNOWN, /* Unknown error */ +}; + +struct cn10k_ml_error_db { + uint64_t code; + char str[RTE_ML_STR_MAX]; +}; + /* Firmware non-fatal error sub-type */ enum cn10k_ml_error_stype_fw_nf { /* 0x0 */ ML_CN10K_FW_ERR_NOERR = 0, /* No error */ diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 834e55e88e9..b30af7c7a44 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -23,7 +23,7 @@ #define ML_FLAGS_SSO_COMPL BIT(1) /* Hardware non-fatal error subtype database */ -static struct cnxk_ml_error_db ml_stype_db_hw_nf[] = { +static struct cn10k_ml_error_db ml_stype_db_hw_nf[] = { {ML_CN10K_FW_ERR_NOERR, "NO ERROR"}, {ML_CN10K_FW_ERR_UNLOAD_ID_NOT_FOUND, "UNLOAD MODEL ID NOT FOUND"}, {ML_CN10K_FW_ERR_LOAD_LUT_OVERFLOW, "LOAD LUT OVERFLOW"}, @@ -38,7 +38,7 @@ static struct cnxk_ml_error_db ml_stype_db_hw_nf[] = { }; /* Driver error subtype database */ -static struct cnxk_ml_error_db ml_stype_db_driver[] = { +static struct cn10k_ml_error_db ml_stype_db_driver[] = { {ML_CN10K_DRIVER_ERR_NOERR, "NO ERROR"}, {ML_CN10K_DRIVER_ERR_UNKNOWN, "UNKNOWN ERROR"}, {ML_CN10K_DRIVER_ERR_EXCEPTION, "FW EXCEPTION"}, @@ -784,6 +784,7 @@ cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * model->result_update = cn10k_ml_result_update; model->set_error_code = cn10k_ml_set_error_code; model->set_poll_addr = cn10k_ml_set_poll_addr; + model->op_error_get = cn10k_ml_op_error_get; return 0; } @@ -1257,7 +1258,7 @@ cn10k_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request) /* Handle driver error */ error_code = (union cn10k_ml_error_code *)&result->error_code; - if (error_code->s.etype == ML_CNXK_ETYPE_DRIVER) { + if (error_code->s.etype == ML_CN10K_ETYPE_DRIVER) { cn10k_mldev = &cnxk_mldev->cn10k_mldev; /* Check for exception */ @@ -1310,7 +1311,7 @@ cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, ui memset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result)); error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code; - error_code->s.etype = ML_CNXK_ETYPE_UNKNOWN; + error_code->s.etype = ML_CN10K_ETYPE_UNKNOWN; req->cn10k_req.result.user_ptr = op->user_ptr; cnxk_ml_set_poll_ptr(req); @@ -1324,16 +1325,17 @@ cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, ui } __rte_hot int -cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, struct rte_ml_op_error *error) +cn10k_ml_op_error_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, + struct rte_ml_op_error *error) { union cn10k_ml_error_code *error_code; - PLT_SET_USED(dev); + PLT_SET_USED(cnxk_mldev); error_code = (union cn10k_ml_error_code *)&op->impl_opaque; /* Copy sub error message */ - if (error_code->s.etype == ML_CNXK_ETYPE_HW_NONFATAL) { + if (error_code->s.etype == ML_CN10K_ETYPE_HW_NONFATAL) { if (error_code->s.stype < PLT_DIM(ml_stype_db_hw_nf)) snprintf(error->message, RTE_ML_STR_MAX, "%s : %s", ml_etype_db[error_code->s.etype].str, @@ -1341,7 +1343,7 @@ cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, struct rte_m else snprintf(error->message, RTE_ML_STR_MAX, "%s : UNKNOWN ERROR", ml_etype_db[error_code->s.etype].str); - } else if (error_code->s.etype == ML_CNXK_ETYPE_DRIVER) { + } else if (error_code->s.etype == ML_CN10K_ETYPE_DRIVER) { snprintf(error->message, RTE_ML_STR_MAX, "%s : %s", ml_etype_db[error_code->s.etype].str, ml_stype_db_driver[error_code->s.stype].str); @@ -1387,7 +1389,7 @@ cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output, memset(&req->cn10k_req.result, 0, sizeof(struct cn10k_ml_result)); error_code = (union cn10k_ml_error_code *)&req->cn10k_req.result.error_code; - error_code->s.etype = ML_CNXK_ETYPE_UNKNOWN; + error_code->s.etype = ML_CN10K_ETYPE_UNKNOWN; req->cn10k_req.result.user_ptr = NULL; cnxk_ml_set_poll_ptr(req); diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index eb3e1c139c7..0f352282014 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.h +++ b/drivers/ml/cnxk/cn10k_ml_ops.h @@ -312,7 +312,7 @@ int cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_ /* Fast-path ops */ __rte_hot bool cn10k_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head); -__rte_hot int cn10k_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, +__rte_hot int cn10k_ml_op_error_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, struct rte_ml_op_error *error); __rte_hot int cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output, uint16_t nb_batches); diff --git a/drivers/ml/cnxk/cnxk_ml_dev.c b/drivers/ml/cnxk/cnxk_ml_dev.c index dc4512223ca..567f8ea7542 100644 --- a/drivers/ml/cnxk/cnxk_ml_dev.c +++ b/drivers/ml/cnxk/cnxk_ml_dev.c @@ -12,11 +12,3 @@ int cnxk_ml_dev_initialized; /* Dummy operations for ML device */ struct rte_ml_dev_ops ml_dev_dummy_ops = {0}; - -/* Error type database */ -struct cnxk_ml_error_db ml_etype_db[] = { - {ML_CNXK_ETYPE_NO_ERROR, "NO_ERROR"}, {ML_CNXK_ETYPE_FW_NONFATAL, "FW_NON_FATAL"}, - {ML_CNXK_ETYPE_HW_NONFATAL, "HW_NON_FATAL"}, {ML_CNXK_ETYPE_HW_FATAL, "HW_FATAL"}, - {ML_CNXK_ETYPE_HW_WARNING, "HW_WARNING"}, {ML_CNXK_ETYPE_DRIVER, "DRIVER_ERROR"}, - {ML_CNXK_ETYPE_UNKNOWN, "UNKNOWN_ERROR"}, -}; diff --git a/drivers/ml/cnxk/cnxk_ml_dev.h b/drivers/ml/cnxk/cnxk_ml_dev.h index 491c4c4aea5..9e373e65715 100644 --- a/drivers/ml/cnxk/cnxk_ml_dev.h +++ b/drivers/ml/cnxk/cnxk_ml_dev.h @@ -22,22 +22,6 @@ #define ML_CNXK_POLL_JOB_START 0 #define ML_CNXK_POLL_JOB_FINISH 1 -/* Error types enumeration */ -enum cnxk_ml_error_etype { - /* 0x0 */ ML_CNXK_ETYPE_NO_ERROR = 0, /* No error */ - /* 0x1 */ ML_CNXK_ETYPE_FW_NONFATAL, /* Firmware non-fatal error */ - /* 0x2 */ ML_CNXK_ETYPE_HW_NONFATAL, /* Hardware non-fatal error */ - /* 0x3 */ ML_CNXK_ETYPE_HW_FATAL, /* Hardware fatal error */ - /* 0x4 */ ML_CNXK_ETYPE_HW_WARNING, /* Hardware warning */ - /* 0x5 */ ML_CNXK_ETYPE_DRIVER, /* Driver specific error */ - /* 0x6 */ ML_CNXK_ETYPE_UNKNOWN, /* Unknown error */ -}; - -struct cnxk_ml_error_db { - uint64_t code; - char str[RTE_ML_STR_MAX]; -}; - /* Device type */ enum cnxk_ml_dev_type { /* PCI based Marvell's ML HW accelerator device */ @@ -115,6 +99,6 @@ struct cnxk_ml_dev { struct cnxk_ml_index_map *index_map; }; -extern struct cnxk_ml_error_db ml_etype_db[]; +extern struct cn10k_ml_error_db ml_etype_db[]; #endif /* _CNXK_ML_DEV_H_ */ diff --git a/drivers/ml/cnxk/cnxk_ml_model.h b/drivers/ml/cnxk/cnxk_ml_model.h index a2fced46a22..1cd5ca1906a 100644 --- a/drivers/ml/cnxk/cnxk_ml_model.h +++ b/drivers/ml/cnxk/cnxk_ml_model.h @@ -128,6 +128,8 @@ typedef bool (*enqueue_single_t)(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_o typedef void (*result_update_t)(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request); typedef void (*set_error_code_t)(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype); typedef void (*set_poll_addr_t)(struct cnxk_ml_req *req); +typedef int (*op_error_get_t)(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, + struct rte_ml_op_error *error); /* Model Object */ struct cnxk_ml_model { @@ -184,6 +186,7 @@ struct cnxk_ml_model { result_update_t result_update; set_error_code_t set_error_code; set_poll_addr_t set_poll_addr; + op_error_get_t op_error_get; }; enum cnxk_ml_model_type cnxk_ml_model_get_type(struct rte_ml_model_params *params); diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 971362b2420..6e0160f2656 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -647,9 +647,7 @@ cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *co cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst; cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst; - - if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) - cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get; + cnxk_mldev->mldev->op_error_get = cnxk_ml_op_error_get; /* Allocate and initialize index_map */ if (cnxk_mldev->index_map == NULL) { @@ -1636,7 +1634,7 @@ cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op * if (plt_tsc_cycles() < req->timeout) goto empty_or_active; else /* Timeout, set indication of driver error */ - model->set_error_code(req, ML_CNXK_ETYPE_DRIVER, 0); + model->set_error_code(req, ML_CN10K_ETYPE_DRIVER, 0); } model->result_update(cnxk_mldev, qp->id, req); @@ -1654,6 +1652,18 @@ cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op * return count; } +__rte_hot int +cnxk_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, struct rte_ml_op_error *error) +{ + struct cnxk_ml_dev *cnxk_mldev; + struct cnxk_ml_model *model; + + cnxk_mldev = dev->data->dev_private; + model = cnxk_mldev->mldev->data->models[op->model_id]; + + return model->op_error_get(cnxk_mldev, op, error); +} + struct rte_ml_dev_ops cnxk_ml_ops = { /* Device control ops */ .dev_info_get = cnxk_ml_dev_info_get, diff --git a/drivers/ml/cnxk/cnxk_ml_ops.h b/drivers/ml/cnxk/cnxk_ml_ops.h index e348cc4e857..7a79fec412e 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.h +++ b/drivers/ml/cnxk/cnxk_ml_ops.h @@ -83,5 +83,7 @@ __rte_hot uint16_t cnxk_ml_dequeue_burst(struct rte_ml_dev *dev, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops); __rte_hot void cnxk_ml_set_poll_ptr(struct cnxk_ml_req *req); __rte_hot uint64_t cnxk_ml_get_poll_ptr(struct cnxk_ml_req *req); +__rte_hot int cnxk_ml_op_error_get(struct rte_ml_dev *dev, struct rte_ml_op *op, + struct rte_ml_op_error *error); #endif /* _CNXK_ML_OPS_H_ */ diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index e825c3fb23e..4c1cda3005b 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -329,11 +329,13 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * model->result_update = cn10k_ml_result_update; model->set_error_code = cn10k_ml_set_error_code; model->set_poll_addr = cn10k_ml_set_poll_addr; + model->op_error_get = cn10k_ml_op_error_get; } else { model->enqueue_single = mvtvm_ml_enqueue_single; model->result_update = mvtvm_ml_result_update; model->set_error_code = mvtvm_ml_set_error_code; model->set_poll_addr = mvtvm_ml_set_poll_addr; + model->op_error_get = mvtvm_ml_op_error_get; } return 0; @@ -584,6 +586,17 @@ mvtvm_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype) req->mvtvm_req.result.error_code = etype; } +__rte_hot int +mvtvm_ml_op_error_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, + struct rte_ml_op_error *error) +{ + RTE_SET_USED(cnxk_mldev); + RTE_SET_USED(op); + RTE_SET_USED(error); + + return 0; +} + __rte_hot bool mvtvm_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head) diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.h b/drivers/ml/cnxk/mvtvm_ml_ops.h index 0232c5ead5d..d8f2f361fb1 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.h +++ b/drivers/ml/cnxk/mvtvm_ml_ops.h @@ -71,6 +71,8 @@ int mvtvm_ml_io_dequantize(void *device, uint16_t model_id, const char *layer_na __rte_hot bool mvtvm_ml_enqueue_single(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, uint16_t layer_id, struct cnxk_ml_qp *qp, uint64_t head); +__rte_hot int mvtvm_ml_op_error_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_op *op, + struct rte_ml_op_error *error); __rte_hot void mvtvm_ml_result_update(struct cnxk_ml_dev *cnxk_mldev, int qp_id, void *request); __rte_hot void mvtvm_ml_set_error_code(struct cnxk_ml_req *req, uint64_t etype, uint64_t stype); -- 2.45.1