Adds driver support for AIEML generation devices. The following modules
are enabled:
- Get tile type from location (support for new memory tile type)
- Clock state tracking and request and release of tiles

Signed-off-by: Gregory Williams <gregory.willi...@amd.com>
---
 drivers/accel/amd-ai-engine/Makefile          |   1 +
 drivers/accel/amd-ai-engine/ai-engine-aieml.c | 210 ++++++++++++++++++
 drivers/accel/amd-ai-engine/ai-engine-dev.c   |   2 +
 .../accel/amd-ai-engine/ai-engine-internal.h  |   2 +
 4 files changed, 215 insertions(+)
 create mode 100644 drivers/accel/amd-ai-engine/ai-engine-aieml.c

diff --git a/drivers/accel/amd-ai-engine/Makefile 
b/drivers/accel/amd-ai-engine/Makefile
index 9a830f7432d2..66cbce4705ea 100644
--- a/drivers/accel/amd-ai-engine/Makefile
+++ b/drivers/accel/amd-ai-engine/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_DRM_ACCEL_AMDAIE)  += amd-aie.o
 
 amd-aie-$(CONFIG_DRM_ACCEL_AMDAIE) := \
        ai-engine-aie.o         \
+       ai-engine-aieml.o       \
        ai-engine-aperture.o    \
        ai-engine-clock.o       \
        ai-engine-dev.o         \
diff --git a/drivers/accel/amd-ai-engine/ai-engine-aieml.c 
b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
new file mode 100644
index 000000000000..328688942a6a
--- /dev/null
+++ b/drivers/accel/amd-ai-engine/ai-engine-aieml.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AI Engine driver AIEML device specific implementation
+ *
+ * Copyright(C) 2025 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#include <linux/amd-ai-engine.h>
+#include <linux/bitmap.h>
+#include <linux/device.h>
+#include <linux/firmware/xlnx-zynqmp.h>
+#include <linux/io.h>
+
+#include "ai-engine-internal.h"
+
+#define AIEML_ARRAY_SHIFT      32U
+#define AIEML_COL_SHIFT                25U
+#define AIEML_ROW_SHIFT                20U
+
+#define NUM_TYPES_OF_MEM       3U
+
+#define NUM_MODS_CORE_TILE     2U
+#define NUM_MODS_MEM_TILE      1U
+#define NUM_MODS_SHIMPL_TILE   1U
+
+/*
+ * Register offsets
+ */
+#define AIEML_SHIMPL_COLCLOCK_CTRL_REGOFF              0x000fff20U
+
+/*
+ * Register masks
+ */
+#define AIEML_SHIMPL_COLRESET_CTRL_MASK                        GENMASK(1, 0)
+#define AIEML_SHIMPL_COLCLOCK_CTRL_MASK                        GENMASK(1, 0)
+
+static u32 aieml_get_tile_type(struct aie_device *adev,
+                              struct aie_location *loc)
+{
+       u8 num_mem_rows = adev->ttype_attr[AIE_TILE_TYPE_MEMORY].num_rows;
+
+       if (loc->row > num_mem_rows)
+               return AIE_TILE_TYPE_TILE;
+       if (loc->row && loc->row <= num_mem_rows)
+               return AIE_TILE_TYPE_MEMORY;
+       if (loc->row == 0)
+               if ((loc->col % 4) < 2)
+                       return AIE_TILE_TYPE_SHIMPL;
+
+       return AIE_TILE_TYPE_SHIMNOC;
+}
+
+/* aieml_scan_part_clocks() - scan clocks of a partition
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for errors.
+ */
+static int aieml_scan_part_clocks(struct aie_partition *apart)
+{
+       struct aie_aperture *aperture = apart->aperture;
+       struct aie_range *range = &apart->range;
+       struct aie_device *adev = apart->adev;
+       struct aie_location loc;
+       int ret;
+
+       /* Clear the bitmap of cores and memories clock state */
+       aie_resource_put_region(&apart->cores_clk_state, 0,
+                               apart->cores_clk_state.total);
+
+       /*
+        * In aieml if clock buffer on shim tile is enabled, the clock for all
+        * tiles in the same column is enabled.
+        */
+       for (loc.col = range->start.col;
+            loc.col < range->start.col + range->size.col;
+            loc.col++) {
+               void __iomem *va;
+               u32 val, nbitpos;
+
+               nbitpos = (loc.col - range->start.col) * (range->size.row - 1);
+
+               va = aperture->base +
+                    aie_cal_regoff(adev, loc,
+                                   AIEML_SHIMPL_COLCLOCK_CTRL_REGOFF);
+               val = readl(va);
+
+               if (!(val & AIEML_SHIMPL_COLCLOCK_CTRL_MASK))
+                       continue;
+
+               ret = aie_resource_set(&apart->cores_clk_state, nbitpos,
+                                      range->size.row - 1);
+               if (ret) {
+                       dev_err(aperture->dev,
+                               "failed to set clock state bitmaps for column 
%u",
+                               loc.col);
+                       return ret;
+               }
+       }
+       /*
+        * Set the tiles in use bitmap.
+        * In case of scanning, tiles which are powered on are considered as
+        * tiles in use.
+        */
+       bitmap_copy(apart->tiles_inuse.bitmap, apart->cores_clk_state.bitmap,
+                   apart->tiles_inuse.total);
+
+       return 0;
+}
+
+/* aieml_set_part_clocks() - set clocks of a partition
+ * @apart: AI engine partition
+ *
+ * Return: 0 for success, negative value for errors.
+ */
+static int aieml_set_part_clocks(struct aie_partition *apart)
+{
+       struct aie_aperture *aperture = apart->aperture;
+       struct aie_range *range = &apart->range;
+       u32 node_id = apart->adev->pm_node_id;
+       struct aie_location loc;
+       int ret;
+
+       for (loc.col = range->start.col;
+            loc.col < range->start.col + range->size.col;
+            loc.col++) {
+               u32 startbit, col_inuse = 0;
+
+               startbit = (loc.col - range->start.col) * (range->size.row - 1);
+
+               for (loc.row = range->start.row + 1;
+                    loc.row < range->start.row + range->size.row;
+                    loc.row++) {
+                       u32 nbitpos = startbit + loc.row - 1;
+
+                       if (aie_resource_testbit(&apart->tiles_inuse, nbitpos)) 
{
+                               col_inuse = 1;
+                               break;
+                       }
+               }
+
+               if (col_inuse) {
+                       ret = zynqmp_pm_aie_operation(node_id, loc.col,
+                                                     1,
+                                                     
XILINX_AIE_OPS_ENB_COL_CLK_BUFF);
+                       if (ret < 0) {
+                               dev_err(aperture->dev,
+                                       "failed to enable clock for column: %d",
+                                       loc.col);
+                               return ret;
+                       }
+
+                       ret = aie_resource_set(&apart->tiles_inuse,
+                                              startbit, apart->range.size.row 
- 1) |
+                             aie_resource_set(&apart->cores_clk_state,
+                                              startbit, apart->range.size.row 
- 1);
+                       if (ret) {
+                               dev_err(aperture->dev,
+                                       "failed to set bitmaps for column: %d",
+                                       loc.col);
+                               return ret;
+                       }
+               } else {
+                       ret = zynqmp_pm_aie_operation(node_id, loc.col,
+                                                     1,
+                                                     
XILINX_AIE_OPS_DIS_COL_CLK_BUFF);
+                       if (ret < 0) {
+                               dev_err(aperture->dev,
+                                       "failed to disable clock for column: 
%d",
+                                       loc.col);
+                               return ret;
+                       }
+
+                       ret = aie_resource_clear(&apart->tiles_inuse,
+                                                startbit, 
apart->range.size.row - 1) |
+                             aie_resource_clear(&apart->cores_clk_state,
+                                                startbit, 
apart->range.size.row - 1);
+                       if (ret) {
+                               dev_err(aperture->dev,
+                                       "failed to clear bitmaps for column: 
%d",
+                                       loc.col);
+                               return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static const struct aie_tile_operations aieml_ops = {
+       .get_tile_type = aieml_get_tile_type,
+       .scan_part_clocks = aieml_scan_part_clocks,
+       .set_part_clocks = aieml_set_part_clocks,
+};
+
+/**
+ * aieml_device_init() - Initialize AI engine device struct AIEML specific
+ * @adev: AI engine device
+ *
+ * This function initialize the AI engine device structure device version
+ * specific elements such as register addressing related array shift,
+ * column shift, and row shift; AIEML device specific device operations, device
+ * columns resource.
+ */
+void aieml_device_init(struct aie_device *adev)
+{
+       adev->array_shift = AIEML_ARRAY_SHIFT;
+       adev->col_shift = AIEML_COL_SHIFT;
+       adev->row_shift = AIEML_ROW_SHIFT;
+       adev->ops = &aieml_ops;
+}
diff --git a/drivers/accel/amd-ai-engine/ai-engine-dev.c 
b/drivers/accel/amd-ai-engine/ai-engine-dev.c
index ba28257cbd04..f713d38ff8c3 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-dev.c
+++ b/drivers/accel/amd-ai-engine/ai-engine-dev.c
@@ -154,6 +154,8 @@ static int amd_ai_engine_probe(struct platform_device *pdev)
        adev->dev_gen = aie_gen;
        if (aie_gen == AIE_DEVICE_GEN_AIE) {
                aie_device_init(adev);
+       } else if (aie_gen == AIE_DEVICE_GEN_AIEML) {
+               aieml_device_init(adev);
        } else {
                dev_err(&pdev->dev, "Invalid device generation");
                return -EINVAL;
diff --git a/drivers/accel/amd-ai-engine/ai-engine-internal.h 
b/drivers/accel/amd-ai-engine/ai-engine-internal.h
index 495d56d5f993..31a45575cc43 100644
--- a/drivers/accel/amd-ai-engine/ai-engine-internal.h
+++ b/drivers/accel/amd-ai-engine/ai-engine-internal.h
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 
 #define AIE_DEVICE_GEN_AIE     1U
+#define AIE_DEVICE_GEN_AIEML   2U
 
 #define KBYTES(n)              ((n) * SZ_1K)
 
@@ -246,6 +247,7 @@ static inline u32 aie_cal_regoff(struct aie_device *adev,
 }
 
 void aie_device_init(struct aie_device *adev);
+void aieml_device_init(struct aie_device *adev);
 struct aie_partition *
 aie_aperture_request_part(struct aie_aperture *aperture,
                          struct aie_partition_req *req);
-- 
2.34.1

Reply via email to