Creates tile memory information structure to store size and offsets for core data and program memory and memory tile memory for AIEML.
Signed-off-by: Gregory Williams <gregory.willi...@amd.com> --- drivers/accel/amd-ai-engine/ai-engine-aie.c | 39 +++++++++ drivers/accel/amd-ai-engine/ai-engine-aieml.c | 47 ++++++++++ .../accel/amd-ai-engine/ai-engine-internal.h | 85 +++++++++++++------ drivers/accel/amd-ai-engine/ai-engine-part.c | 45 ++++++++++ 4 files changed, 192 insertions(+), 24 deletions(-) diff --git a/drivers/accel/amd-ai-engine/ai-engine-aie.c b/drivers/accel/amd-ai-engine/ai-engine-aie.c index 5e3cb44a16c8..056db0b7be0e 100644 --- a/drivers/accel/amd-ai-engine/ai-engine-aie.c +++ b/drivers/accel/amd-ai-engine/ai-engine-aie.c @@ -16,6 +16,8 @@ #define AIE_COL_SHIFT 23U #define AIE_ROW_SHIFT 18U +#define NUM_TYPES_OF_MEM 2U + /* * Register offsets */ @@ -41,6 +43,42 @@ static u32 aie_get_tile_type(struct aie_device *adev, struct aie_location *loc) return AIE_TILE_TYPE_SHIMNOC; } +static unsigned int aie_get_mem_info(struct aie_device *adev, + struct aie_range *range, + struct aie_part_mem *pmem) +{ + u8 start_row, num_rows; + unsigned int i; + + if (range->start.row + range->size.row <= 1) { + /* SHIM row only, no memories in this range */ + return 0; + } + if (!pmem) + return NUM_TYPES_OF_MEM; + + for (i = 0; i < NUM_TYPES_OF_MEM; i++) { + struct aie_mem *mem = &pmem[i].mem; + + memcpy(&mem->range, range, sizeof(*range)); + } + + start_row = adev->ttype_attr[AIE_TILE_TYPE_TILE].start_row; + num_rows = adev->ttype_attr[AIE_TILE_TYPE_TILE].num_rows; + /* Setup tile data memory information */ + pmem[0].mem.offset = 0; + pmem[0].mem.size = KBYTES(32); + pmem[0].mem.range.start.row = start_row; + pmem[0].mem.range.size.row = num_rows; + /* Setup program memory information */ + pmem[1].mem.offset = 0x20000; + pmem[1].mem.size = KBYTES(16); + pmem[1].mem.range.start.row = start_row; + pmem[1].mem.range.size.row = num_rows; + + return NUM_TYPES_OF_MEM; +} + /* aie_scan_part_clocks() - scan clocks of a partition * @apart: AI engine partition * @@ -258,6 +296,7 @@ static int aie_set_part_clocks(struct aie_partition *apart) } static const struct aie_tile_operations aie_ops = { .get_tile_type = aie_get_tile_type, + .get_mem_info = aie_get_mem_info, .scan_part_clocks = aie_scan_part_clocks, .set_part_clocks = aie_set_part_clocks, }; diff --git a/drivers/accel/amd-ai-engine/ai-engine-aieml.c b/drivers/accel/amd-ai-engine/ai-engine-aieml.c index 328688942a6a..7730609ff7c0 100644 --- a/drivers/accel/amd-ai-engine/ai-engine-aieml.c +++ b/drivers/accel/amd-ai-engine/ai-engine-aieml.c @@ -50,6 +50,52 @@ static u32 aieml_get_tile_type(struct aie_device *adev, return AIE_TILE_TYPE_SHIMNOC; } +static unsigned int aieml_get_mem_info(struct aie_device *adev, + struct aie_range *range, + struct aie_part_mem *pmem) +{ + u8 start_row, num_rows; + unsigned int i; + + if (range->start.row + range->size.row <= 1) { + /* SHIM row only, no memories in this range */ + return 0; + } + + if (!pmem) + return NUM_TYPES_OF_MEM; + + for (i = 0; i < NUM_TYPES_OF_MEM; i++) { + struct aie_mem *mem = &pmem[i].mem; + + memcpy(&mem->range, range, sizeof(*range)); + } + + start_row = adev->ttype_attr[AIE_TILE_TYPE_TILE].start_row; + num_rows = adev->ttype_attr[AIE_TILE_TYPE_TILE].num_rows; + /* Setup tile data memory information */ + pmem[0].mem.offset = 0; + pmem[0].mem.size = KBYTES(64); + pmem[0].mem.range.start.row = start_row; + pmem[0].mem.range.size.row = num_rows; + + /* Setup program memory information */ + pmem[1].mem.offset = 0x20000; + pmem[1].mem.size = KBYTES(16); + pmem[1].mem.range.start.row = start_row; + pmem[1].mem.range.size.row = num_rows; + + start_row = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].start_row; + num_rows = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].num_rows; + /* Setup memory tile memory information */ + pmem[2].mem.offset = 0; + pmem[2].mem.size = KBYTES(512); + pmem[2].mem.range.start.row = start_row; + pmem[2].mem.range.size.row = num_rows; + + return NUM_TYPES_OF_MEM; +} + /* aieml_scan_part_clocks() - scan clocks of a partition * @apart: AI engine partition * @@ -188,6 +234,7 @@ static int aieml_set_part_clocks(struct aie_partition *apart) static const struct aie_tile_operations aieml_ops = { .get_tile_type = aieml_get_tile_type, + .get_mem_info = aieml_get_mem_info, .scan_part_clocks = aieml_scan_part_clocks, .set_part_clocks = aieml_set_part_clocks, }; diff --git a/drivers/accel/amd-ai-engine/ai-engine-internal.h b/drivers/accel/amd-ai-engine/ai-engine-internal.h index 31a45575cc43..13a39c4e3331 100644 --- a/drivers/accel/amd-ai-engine/ai-engine-internal.h +++ b/drivers/accel/amd-ai-engine/ai-engine-internal.h @@ -68,30 +68,6 @@ struct aie_device; struct aie_partition; struct aie_aperture; -/** - * struct aie_tile_operations - AI engine device operations - * @get_tile_type: get type of tile based on tile operation - * @scan_part_clocks: scan partition modules to check whether the modules are - * clock gated or not, and update the soft clock states - * structure. It is required to be called when the partition - * is requested so that the driver knows which modules are - * clock gated when the partition is requested. This function - * expects the caller to apply partition lock before calling - * this function. - * @set_part_clocks: set partition modules clocks gate registers based on the - * partition clock states bitmap. This function expects the - * caller to apply partition lock before calling this - * function. The caller function will need to set the bitmap - * on which tiles are required to be clocked on. - * Different AI engine device version has its own device - * operation. - */ -struct aie_tile_operations { - u32 (*get_tile_type)(struct aie_device *adev, struct aie_location *loc); - int (*scan_part_clocks)(struct aie_partition *apart); - int (*set_part_clocks)(struct aie_partition *apart); -}; - /** * struct aie_resource - AI engine resource structure * @bitmap: resource bitmap @@ -112,6 +88,37 @@ struct aie_range { struct aie_location size; }; +/** + * struct aie_mem - AIE memory information + * @range: range of tiles of the memory + * @offset: register offset within a tile of the memory + * @size: of a the memory in one tile + */ +struct aie_mem { + struct aie_range range; + __kernel_size_t offset; + __kernel_size_t size; +}; + +/** + * struct aie_part_mem - AI engine partition memory information structure + * @apart: AI engine partition + * @mem: memory information of a type of memory + * @size: size of the total memories in the partition + * + * This structure is to keep the information of a type of memory in a + * partition. The memory information will be stored in @mem property. + * The following information will be kept: + * * memory start address offset within a tile + * * memory size + * * what tiles contain this type of memory + */ +struct aie_part_mem { + struct aie_partition *apart; + struct aie_mem mem; + size_t size; +}; + /** * struct aie_tile_attr - AI engine device tile type attributes * @start_row: start row @@ -126,6 +133,34 @@ struct aie_tile_attr { const enum aie_module_type *mods; }; +/** + * struct aie_tile_operations - AI engine device operations + * @get_tile_type: get type of tile based on tile operation + * @get_mem_info: get different types of memories information + * @scan_part_clocks: scan partition modules to check whether the modules are + * clock gated or not, and update the soft clock states + * structure. It is required to be called when the partition + * is requested so that the driver knows which modules are + * clock gated when the partition is requested. This function + * expects the caller to apply partition lock before calling + * this function. + * @set_part_clocks: set partition modules clocks gate registers based on the + * partition clock states bitmap. This function expects the + * caller to apply partition lock before calling this + * function. The caller function will need to set the bitmap + * on which tiles are required to be clocked on. + * Different AI engine device version has its own device + * operation. + */ +struct aie_tile_operations { + u32 (*get_tile_type)(struct aie_device *adev, struct aie_location *loc); + unsigned int (*get_mem_info)(struct aie_device *adev, + struct aie_range *range, + struct aie_part_mem *pmem); + int (*scan_part_clocks)(struct aie_partition *apart); + int (*set_part_clocks)(struct aie_partition *apart); +}; + /** * struct aie_device - AI engine device structure * @apertures: list of apertures @@ -188,6 +223,7 @@ struct aie_aperture { * @range: range of partition * @cores_clk_state: bitmap to indicate the power state of core and mem tiles * @tiles_inuse: bitmap to indicate if a tile is in use + * @pmems: pointer to partition memories types * @mlock: protection for AI engine partition operations * @freq_req: required frequency */ @@ -198,6 +234,7 @@ struct aie_partition { struct aie_range range; struct aie_resource cores_clk_state; struct aie_resource tiles_inuse; + struct aie_part_mem *pmems; struct mutex mlock; /* protection for AI engine partition operations */ u64 freq_req; }; diff --git a/drivers/accel/amd-ai-engine/ai-engine-part.c b/drivers/accel/amd-ai-engine/ai-engine-part.c index 83099cb60161..878597eff202 100644 --- a/drivers/accel/amd-ai-engine/ai-engine-part.c +++ b/drivers/accel/amd-ai-engine/ai-engine-part.c @@ -12,6 +12,44 @@ #include "ai-engine-internal.h" +/** + * aie_part_create_mems_info() - creates array to store the AI engine partition + * different memories types information + * @apart: AI engine partition + * + * Return: 0 for success, negative value for failure + * + * This function will create array to store the information of different + * memories types in the partition. This array is stored in @apart->pmems. + */ +static int aie_part_create_mems_info(struct aie_partition *apart) +{ + unsigned int i, num_mems; + + num_mems = apart->adev->ops->get_mem_info(apart->adev, &apart->range, + NULL); + if (!num_mems) + return 0; + + apart->pmems = devm_kcalloc(apart->aperture->dev, num_mems, + sizeof(struct aie_part_mem), + GFP_KERNEL); + if (!apart->pmems) + return -ENOMEM; + + apart->adev->ops->get_mem_info(apart->adev, &apart->range, + apart->pmems); + for (i = 0; i < num_mems; i++) { + struct aie_mem *mem = &apart->pmems[i].mem; + + apart->pmems[i].apart = apart; + apart->pmems[i].size = mem->size * + mem->range.size.col * + mem->range.size.row; + } + return 0; +} + /** * aie_part_release() - release an AI engine partition instance * @apart: AI engine partition device @@ -29,6 +67,7 @@ void aie_part_release(struct aie_partition *apart) aie_resource_uninitialize(&apart->cores_clk_state); aie_resource_uninitialize(&apart->tiles_inuse); list_del(&apart->node); + devm_kfree(aperture->dev, apart->pmems); devm_kfree(aperture->dev, apart); mutex_unlock(&aperture->mlock); } @@ -64,6 +103,12 @@ struct aie_partition *aie_part_create(struct aie_aperture *aperture, apart->range.start.row = aperture->range.start.row; apart->range.size.row = aperture->range.size.row; + ret = aie_part_create_mems_info(apart); + if (ret) { + dev_err(aperture->dev, "failed to create tile memory information."); + return ERR_PTR(ret); + } + /* SHIM row always enabled so it is not needed in the bitmap */ num_tiles = apart->range.size.col * (apart->range.size.row - 1); ret = aie_resource_initialize(&apart->cores_clk_state, num_tiles); -- 2.34.1