Added internal structures to handle tile and OCM information and
OCM to model memory mapping. Initialize the fields to platform
specific defaults and compute the OCM / tile requirements for model.

Signed-off-by: Srikanth Yalavarthi <syalavar...@marvell.com>
---
 drivers/ml/cnxk/cn10k_ml_dev.h   |  5 ++
 drivers/ml/cnxk/cn10k_ml_model.c | 53 +++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_model.h |  6 +++
 drivers/ml/cnxk/cn10k_ml_ocm.c   |  5 ++
 drivers/ml/cnxk/cn10k_ml_ocm.h   | 79 ++++++++++++++++++++++++++++++++
 drivers/ml/cnxk/cn10k_ml_ops.c   | 31 ++++++++++++-
 drivers/ml/cnxk/meson.build      |  2 +
 7 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 drivers/ml/cnxk/cn10k_ml_ocm.c
 create mode 100644 drivers/ml/cnxk/cn10k_ml_ocm.h

diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h
index 7cf6268115..02a4496c97 100644
--- a/drivers/ml/cnxk/cn10k_ml_dev.h
+++ b/drivers/ml/cnxk/cn10k_ml_dev.h
@@ -7,6 +7,8 @@
 
 #include <roc_api.h>
 
+#include "cn10k_ml_ocm.h"
+
 /* Marvell OCTEON CN10K ML PMD device name */
 #define MLDEV_NAME_CN10K_PMD ml_cn10k
 
@@ -215,6 +217,9 @@ struct cn10k_ml_dev {
        /* Firmware */
        struct cn10k_ml_fw fw;
 
+       /* OCM info */
+       struct cn10k_ml_ocm ocm;
+
        /* Number of models loaded */
        uint16_t nb_models_loaded;
 };
diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c
index 2530beb80e..69d6306104 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.c
+++ b/drivers/ml/cnxk/cn10k_ml_model.c
@@ -8,6 +8,7 @@
 
 #include "cn10k_ml_dev.h"
 #include "cn10k_ml_model.h"
+#include "cn10k_ml_ocm.h"
 
 static enum rte_ml_io_type
 cn10k_ml_io_type_map(uint8_t type)
@@ -303,3 +304,55 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, 
uint8_t *buffer, uint8_
                           addr->output[i].sz_d, addr->output[i].sz_q);
        }
 }
+
+int
+cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, uint16_t model_id, 
uint8_t *buffer,
+                              uint16_t *wb_pages, uint16_t *scratch_pages)
+{
+       struct cn10k_ml_model_metadata *metadata;
+       struct cn10k_ml_ocm *ocm;
+       uint64_t scratch_size;
+       uint64_t wb_size;
+
+       metadata = (struct cn10k_ml_model_metadata *)buffer;
+       ocm = &mldev->ocm;
+
+       /* Assume wb_size is zero for non-relocatable models */
+       if (metadata->model.ocm_relocatable)
+               wb_size = metadata->model.ocm_wb_range_end - 
metadata->model.ocm_wb_range_start + 1;
+       else
+               wb_size = 0;
+
+       if (wb_size % ocm->page_size)
+               *wb_pages = wb_size / ocm->page_size + 1;
+       else
+               *wb_pages = wb_size / ocm->page_size;
+       plt_ml_dbg("model_id = %u, wb_size = %" PRIu64 ", wb_pages = %u", 
model_id, wb_size,
+                  *wb_pages);
+
+       scratch_size = ocm->size_per_tile - metadata->model.ocm_tmp_range_floor;
+       if (metadata->model.ocm_tmp_range_floor % ocm->page_size)
+               *scratch_pages = scratch_size / ocm->page_size + 1;
+       else
+               *scratch_pages = scratch_size / ocm->page_size;
+       plt_ml_dbg("model_id = %u, scratch_size = %" PRIu64 ", scratch_pages = 
%u", model_id,
+                  scratch_size, *scratch_pages);
+
+       /* Check if the model can be loaded on OCM */
+       if ((*wb_pages + *scratch_pages) > ML_CN10K_OCM_NUMPAGES) {
+               plt_err("Cannot create the model, OCM relocatable = %u",
+                       metadata->model.ocm_relocatable);
+               plt_err("wb_pages (%u) + scratch_pages (%u) > %u", *wb_pages, 
*scratch_pages,
+                       ML_CN10K_OCM_NUMPAGES);
+               return -ENOMEM;
+       }
+
+       /* Update scratch_pages to block the full tile for OCM non-relocatable 
model. This would
+        * prevent the library from allocating the remaining space on the tile 
to other models.
+        */
+       if (!metadata->model.ocm_relocatable)
+               *scratch_pages =
+                       PLT_MAX(PLT_U64_CAST(*scratch_pages), 
PLT_U64_CAST(ML_CN10K_OCM_NUMPAGES));
+
+       return 0;
+}
diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h
index 5345160a74..7893635787 100644
--- a/drivers/ml/cnxk/cn10k_ml_model.h
+++ b/drivers/ml/cnxk/cn10k_ml_model.h
@@ -10,6 +10,7 @@
 #include <roc_api.h>
 
 #include "cn10k_ml_dev.h"
+#include "cn10k_ml_ocm.h"
 
 /* Model state */
 enum cn10k_ml_model_state {
@@ -417,6 +418,9 @@ struct cn10k_ml_model {
        /* Address structure */
        struct cn10k_ml_model_addr addr;
 
+       /* Tile and memory information object */
+       struct cn10k_ml_ocm_model_map model_mem_map;
+
        /* Spinlock, used to update model state */
        plt_spinlock_t lock;
 
@@ -428,5 +432,7 @@ int cn10k_ml_model_metadata_check(uint8_t *buffer, uint64_t 
size);
 void cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata);
 void cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer,
                                uint8_t *base_dma_addr);
+int cn10k_ml_model_ocm_pages_count(struct cn10k_ml_dev *mldev, uint16_t 
model_id, uint8_t *buffer,
+                                  uint16_t *wb_pages, uint16_t *scratch_pages);
 
 #endif /* _CN10K_ML_MODEL_H_ */
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c
new file mode 100644
index 0000000000..b1c62f2963
--- /dev/null
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.c
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#include "cn10k_ml_ocm.h"
diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.h b/drivers/ml/cnxk/cn10k_ml_ocm.h
new file mode 100644
index 0000000000..44390396f9
--- /dev/null
+++ b/drivers/ml/cnxk/cn10k_ml_ocm.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2022 Marvell.
+ */
+
+#ifndef _CN10K_ML_OCM_H_
+#define _CN10K_ML_OCM_H_
+
+#include <rte_mldev.h>
+
+/* Page size in bytes. */
+#define ML_CN10K_OCM_PAGESIZE 0x4000
+
+/* Number of OCM tiles. */
+#define ML_CN10K_OCM_NUMTILES 0x8
+
+/* OCM in bytes, per tile. */
+#define ML_CN10K_OCM_TILESIZE 0x100000
+
+/* OCM pages, per tile. */
+#define ML_CN10K_OCM_NUMPAGES (ML_CN10K_OCM_TILESIZE / ML_CN10K_OCM_PAGESIZE)
+
+/* Maximum OCM mask words, per tile, 8 bit words. */
+#define ML_CN10K_OCM_MASKWORDS (ML_CN10K_OCM_NUMPAGES / 8)
+
+/* OCM and Tile information structure */
+struct cn10k_ml_ocm_tile_info {
+       /* Mask of used / allotted pages on tile's OCM */
+       uint8_t ocm_mask[ML_CN10K_OCM_MASKWORDS];
+
+       /* Last pages in the tile's OCM used for weights and bias, default = -1 
*/
+       int last_wb_page;
+
+       /* Number pages used for scratch memory on the tile's OCM */
+       uint16_t scratch_pages;
+};
+
+/* Model OCM map structure */
+struct cn10k_ml_ocm_model_map {
+       /* Status of OCM reservation */
+       bool ocm_reserved;
+
+       /* Mask of OCM tiles for the model */
+       uint64_t tilemask;
+
+       /* Start page for the model load, default = -1 */
+       int wb_page_start;
+
+       /* Number of pages required for weights and bias */
+       uint16_t wb_pages;
+
+       /* Number of pages required for scratch memory */
+       uint16_t scratch_pages;
+};
+
+/* OCM state structure */
+struct cn10k_ml_ocm {
+       /* OCM spinlock, used to update OCM state */
+       rte_spinlock_t lock;
+
+       /* Number of OCM tiles */
+       uint8_t num_tiles;
+
+       /* OCM size per each tile */
+       uint64_t size_per_tile;
+
+       /* Size of OCM page */
+       uint64_t page_size;
+
+       /* Number of OCM pages */
+       uint16_t num_pages;
+
+       /* Words per OCM mask */
+       uint16_t mask_words;
+
+       /* OCM memory info and status*/
+       struct cn10k_ml_ocm_tile_info tile_ocm_info[ML_CN10K_OCM_NUMTILES];
+};
+
+#endif /* _CN10K_ML_OCM_H_ */
diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c
index b11228f2cb..302ce8a452 100644
--- a/drivers/ml/cnxk/cn10k_ml_ops.c
+++ b/drivers/ml/cnxk/cn10k_ml_ops.c
@@ -126,9 +126,11 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const 
struct rte_ml_dev_config *c
        struct rte_ml_dev_info dev_info;
        struct cn10k_ml_model *model;
        struct cn10k_ml_dev *mldev;
+       struct cn10k_ml_ocm *ocm;
        struct cn10k_ml_qp *qp;
-       uint32_t mz_size;
        uint16_t model_id;
+       uint32_t mz_size;
+       uint16_t tile_id;
        uint16_t qp_id;
        int ret;
 
@@ -250,6 +252,18 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const 
struct rte_ml_dev_config *c
        }
        dev->data->nb_models = conf->nb_models;
 
+       ocm = &mldev->ocm;
+       ocm->num_tiles = ML_CN10K_OCM_NUMTILES;
+       ocm->size_per_tile = ML_CN10K_OCM_TILESIZE;
+       ocm->page_size = ML_CN10K_OCM_PAGESIZE;
+       ocm->num_pages = ocm->size_per_tile / ocm->page_size;
+       ocm->mask_words = ocm->num_pages / (8 * sizeof(uint8_t));
+
+       for (tile_id = 0; tile_id < ocm->num_tiles; tile_id++)
+               ocm->tile_ocm_info[tile_id].last_wb_page = -1;
+
+       rte_spinlock_init(&ocm->lock);
+
        mldev->nb_models_loaded = 0;
        mldev->state = ML_CN10K_DEV_STATE_CONFIGURED;
 
@@ -416,6 +430,8 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct 
rte_ml_model_params *params,
        const struct plt_memzone *mz;
        size_t model_data_size;
        uint8_t *base_dma_addr;
+       uint16_t scratch_pages;
+       uint16_t wb_pages;
        uint64_t mz_size;
        uint16_t idx;
        bool found;
@@ -441,6 +457,11 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct 
rte_ml_model_params *params,
                return -ENOMEM;
        }
 
+       /* Get WB and scratch pages, check if model can be loaded. */
+       ret = cn10k_ml_model_ocm_pages_count(mldev, idx, params->addr, 
&wb_pages, &scratch_pages);
+       if (ret < 0)
+               return ret;
+
        /* Compute memzone size */
        metadata = (struct cn10k_ml_model_metadata *)params->addr;
        model_data_size = metadata->init_model.file_size + 
metadata->main_model.file_size +
@@ -478,6 +499,14 @@ cn10k_ml_model_load(struct rte_ml_dev *dev, struct 
rte_ml_model_params *params,
        /* Copy data from load to run. run address to be used by MLIP */
        rte_memcpy(model->addr.base_dma_addr_run, 
model->addr.base_dma_addr_load, model_data_size);
 
+       /* Initialize model_mem_map */
+       memset(&model->model_mem_map, 0, sizeof(struct cn10k_ml_ocm_model_map));
+       model->model_mem_map.ocm_reserved = false;
+       model->model_mem_map.tilemask = 0;
+       model->model_mem_map.wb_page_start = -1;
+       model->model_mem_map.wb_pages = wb_pages;
+       model->model_mem_map.scratch_pages = scratch_pages;
+
        plt_spinlock_init(&model->lock);
        model->state = ML_CN10K_MODEL_STATE_LOADED;
        dev->data->models[idx] = model;
diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build
index 799e8f2470..393bc629b0 100644
--- a/drivers/ml/cnxk/meson.build
+++ b/drivers/ml/cnxk/meson.build
@@ -11,12 +11,14 @@ driver_sdk_headers = files(
         'cn10k_ml_dev.h',
         'cn10k_ml_ops.h',
         'cn10k_ml_model.h',
+        'cn10k_ml_ocm.h',
 )
 
 sources = files(
         'cn10k_ml_dev.c',
         'cn10k_ml_ops.c',
         'cn10k_ml_model.c',
+        'cn10k_ml_ocm.c',
 )
 
 deps += ['mldev', 'common_cnxk', 'kvargs', 'hash']
-- 
2.17.1

Reply via email to