On 4/9/2025 11:30 AM, Nipun Gupta wrote:
The AMD PKI accelerator driver provides a accel interface to interact

"an accel"

with the device for offloading and accelerating asymmetric crypto
operations.

Signed-off-by: Nipun Gupta <nipun.gu...@amd.com>
---

Changes RFC->v2:
- moved from misc to accel
- added architecture and compile test dependency in Kconfig
- removed sysfs (and added debugfs in new patch 3/3)
- fixed platform compat
- removed redundant resource index 1 configuration (which was there in
   RFC patch)

  MAINTAINERS                     |   2 +
  drivers/accel/Kconfig           |   1 +
  drivers/accel/Makefile          |   1 +
  drivers/accel/amdpk/Kconfig     |  18 +
  drivers/accel/amdpk/Makefile    |   8 +
  drivers/accel/amdpk/amdpk_drv.c | 736 ++++++++++++++++++++++++++++++++
  drivers/accel/amdpk/amdpk_drv.h | 271 ++++++++++++
  include/uapi/drm/amdpk.h        |  49 +++
  8 files changed, 1086 insertions(+)
  create mode 100644 drivers/accel/amdpk/Kconfig
  create mode 100644 drivers/accel/amdpk/Makefile
  create mode 100644 drivers/accel/amdpk/amdpk_drv.c
  create mode 100644 drivers/accel/amdpk/amdpk_drv.h
  create mode 100644 include/uapi/drm/amdpk.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 11f8815daa77..cdc305a206aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1161,6 +1161,8 @@ L:        dri-devel@lists.freedesktop.org
  S:    Maintained
  T:    git https://gitlab.freedesktop.org/drm/misc/kernel.git
  F:    Documentation/devicetree/bindings/accel/amd,versal-net-pki.yaml
+F:     drivers/accel/amdpk/
+F:     include/uapi/drm/amdpk.h

  AMD PMC DRIVER
  M:    Shyam Sundar S K <shyam-sundar....@amd.com>
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index 5b9490367a39..5632c6c62c15 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -28,5 +28,6 @@ source "drivers/accel/amdxdna/Kconfig"
  source "drivers/accel/habanalabs/Kconfig"
  source "drivers/accel/ivpu/Kconfig"
  source "drivers/accel/qaic/Kconfig"
+source "drivers/accel/amdpk/Kconfig"

Alphabetical order


  endif
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index a301fb6089d4..caea6d636ac8 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_DRM_ACCEL_AMDXDNA)         += amdxdna/
  obj-$(CONFIG_DRM_ACCEL_HABANALABS)    += habanalabs/
  obj-$(CONFIG_DRM_ACCEL_IVPU)          += ivpu/
  obj-$(CONFIG_DRM_ACCEL_QAIC)          += qaic/
+obj-$(CONFIG_DRM_ACCEL_AMDPK)          += amdpk/

Alphabetical order

diff --git a/drivers/accel/amdpk/amdpk_drv.c b/drivers/accel/amdpk/amdpk_drv.c
new file mode 100644
index 000000000000..17c328d03db8
--- /dev/null
+++ b/drivers/accel/amdpk/amdpk_drv.c
@@ -0,0 +1,736 @@
+// SPDX-License-Identifier: GPL-2.0

Deprecated SPDX tag.  Checkpatch will catch this.

+/*
+ * Copyright (c) 2018-2021 Silex Insight sa
+ * Copyright (c) 2018-2021 Beerten Engineering scs
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+/*
+ * Device Overview
+ * ===============
+ * AMD PKI accelerator is a device on AMD versal-net to execute public
+ * key asymmetric crypto operations like ECDSA, ECDH, RSA etc. with high
+ * performance. The driver provides accel interface to applications for
+ * configuring the device and performing the required operations. AMD PKI
+ * device comprises of multiple Barco Silex ba414 PKI engines bundled together,
+ * and providing a queue based interface to interact with these devices on AMD
+ * versal-net.
+ *
+ * Following figure provides the brief overview of the device interface with
+ * the software:
+ *
+ * +------------------+
+ * |    Software      |
+ * +------------------+
+ *     |          |
+ *     |          v
+ *     |     +-----------------------------------------------------------+
+ *     |     |                     RAM                                   |
+ *     |     |  +----------------------------+   +---------------------+ |
+ *     |     |  |           RQ pages         |   |       CQ pages      | |
+ *     |     |  | +------------------------+ |   | +-----------------+ | |
+ *     |     |  | |   START (cmd)          | |   | | req_id | status | | |
+ *     |     |  | |   TFRI (addr, sz)---+  | |   | | req_id | status | | |
+ *     |     |  | | +-TFRO (addr, sz)   |  | |   | | ...             | | |
+ *     |     |  | | | NTFY (req_id)     |  | |   | +-----------------+ | |
+ *     |     |  | +-|-------------------|--+ |   |                     | |
+ *     |     |  |   |                   v    |   +---------------------+ |
+ *     |     |  |   |         +-----------+  |                           |
+ *     |     |  |   |         | input     |  |                           |
+ *     |     |  |   |         | data      |  |                           |
+ *     |     |  |   v         +-----------+  |                           |
+ *     |     |  |  +----------------+        |                           |
+ *     |     |  |  |  output data   |        |                           |
+ *     |     |  |  +----------------+        |                           |
+ *     |     |  +----------------------------+                           |
+ *     |     |                                                           |
+ *     |     +-----------------------------------------------------------+
+ *     |
+ *     |
+ * +---|----------------------------------------------------+
+ * |   v                AMD PKI device                      |
+ * |  +-------------------+     +------------------------+  |
+ * |  | New request FIFO  | --> |       PK engines       |  |
+ * |  +-------------------+     +------------------------+  |
+ * +--------------------------------------------------------+
+ *
+ * To perform a crypto operation, the software writes a sequence of 
descriptors,
+ * into the RQ memory. This includes input data and designated location for the
+ * output data. After preparing the request, request offset (from the RQ memory
+ * region) is written into the NEW_REQUEST register. Request is then stored in 
a
+ * common hardware FIFO shared among all RQs.
+ *
+ * When a PK engine becomes available, device pops the request from the FIFO 
and
+ * fetches the descriptors. It DMAs the input data from RQ memory and executes
+ * the necessary computations. After computation is complete, the device writes
+ * output data back to RAM via DMA. Device then writes a new entry in CQ ring
+ * buffer in RAM, indicating completion of the request. Device also generates
+ * an interrupt for notifying completion to the software.
+ */

Feels like this would be better served in Documentation

+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/eventfd.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_ioctl.h>

Alphabetical order

+
+#include "amdpk_drv.h"
+
+#define DRIVER_NAME "amdpk"
+
+static void amdpk_init_cq(struct amdpk_dev *pkdev, struct amdpk_cq *cq,
+                         int szcode, char *base)
+{
+       cq->pkdev = pkdev;
+       cq->generation = 1;
+       cq->szcode = szcode;
+       cq->base = (u32 *)base;
+       cq->tail = 0;
+}
+
+static int amdpk_pop_cq(struct amdpk_cq *cq, int *rid)
+{
+       u32 status = CQ_STATUS_VALID;
+       unsigned int sz;
+       u32 completion;
+
+       completion = cq->base[cq->tail + 1];
+       if ((completion & CQ_GENERATION_BIT) != cq->generation)
+               return CQ_STATUS_INVALID;
+
+       *rid = (completion >> 16) & 0xffff;
+       /* read memory barrier: to avoid a race condition, the status field 
should
+        * not be read before the completion generation bit. Otherwise we could
+        * get stale outdated status data.
+        */

Incorrect comment format.

+       rmb();

Shouldn't you be using readl()?

+       status |= cq->base[cq->tail];
+       /* advance completion queue tail */
+       cq->tail += 2;
+       sz = 1 << (cq->szcode - 2);
+       if (cq->tail >= sz) {
+               cq->tail = 0;
+               cq->generation ^= 1; /* invert generation bit */
+       }
+
+       /* evaluate status from the completion queue */
+       if (completion & CQ_COMPLETION_BIT)
+               status |= CQ_COMPLETION_ERROR;
+
+       return status;
+}
+
+static const struct file_operations amdpk_accel_fops = {
+       .owner          = THIS_MODULE,
+       .open           = accel_open,
+       .release        = drm_release,
+       .unlocked_ioctl = drm_ioctl,
+       .compat_ioctl   = drm_compat_ioctl,
+       .llseek         = noop_llseek,
+       .mmap           = amdpk_accel_mmap,
+};

DEFINE_DRM_ACCEL_FOPS ?

diff --git a/include/uapi/drm/amdpk.h b/include/uapi/drm/amdpk.h
new file mode 100644
index 000000000000..e5e18fdbc2c4
--- /dev/null
+++ b/include/uapi/drm/amdpk.h
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __AMDPK_H__
+#define __AMDPK_H__
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define MAX_PK_REQS            256
+
+struct amdpk_info {
+       /** maximum available queue depth */

This doesn't look like valid kerneldoc

+       unsigned int avail_qdepth;
+};

Doesn't look like this handles compat correctly. Did you read the documentation on creating ioctls?

Reply via email to