The AMD PKI accelerator driver provides a accel interface to interact with the device for offloading and accelerating asymmetric crypto operations.
Signed-off-by: Nipun Gupta <nipun.gu...@amd.com> --- Changes RFC->v2: - moved from misc to accel - added architecture and compile test dependency in Kconfig - removed sysfs (and added debugfs in new patch 3/3) - fixed platform compat - removed redundant resource index 1 configuration (which was there in RFC patch) MAINTAINERS | 2 + drivers/accel/Kconfig | 1 + drivers/accel/Makefile | 1 + drivers/accel/amdpk/Kconfig | 18 + drivers/accel/amdpk/Makefile | 8 + drivers/accel/amdpk/amdpk_drv.c | 736 ++++++++++++++++++++++++++++++++ drivers/accel/amdpk/amdpk_drv.h | 271 ++++++++++++ include/uapi/drm/amdpk.h | 49 +++ 8 files changed, 1086 insertions(+) create mode 100644 drivers/accel/amdpk/Kconfig create mode 100644 drivers/accel/amdpk/Makefile create mode 100644 drivers/accel/amdpk/amdpk_drv.c create mode 100644 drivers/accel/amdpk/amdpk_drv.h create mode 100644 include/uapi/drm/amdpk.h diff --git a/MAINTAINERS b/MAINTAINERS index 11f8815daa77..cdc305a206aa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1161,6 +1161,8 @@ L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: Documentation/devicetree/bindings/accel/amd,versal-net-pki.yaml +F: drivers/accel/amdpk/ +F: include/uapi/drm/amdpk.h AMD PMC DRIVER M: Shyam Sundar S K <shyam-sundar....@amd.com> diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig index 5b9490367a39..5632c6c62c15 100644 --- a/drivers/accel/Kconfig +++ b/drivers/accel/Kconfig @@ -28,5 +28,6 @@ source "drivers/accel/amdxdna/Kconfig" source "drivers/accel/habanalabs/Kconfig" source "drivers/accel/ivpu/Kconfig" source "drivers/accel/qaic/Kconfig" +source "drivers/accel/amdpk/Kconfig" endif diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile index a301fb6089d4..caea6d636ac8 100644 --- a/drivers/accel/Makefile +++ b/drivers/accel/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/ obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/ obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/ obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/ +obj-$(CONFIG_DRM_ACCEL_AMDPK) += amdpk/ diff --git a/drivers/accel/amdpk/Kconfig b/drivers/accel/amdpk/Kconfig new file mode 100644 index 000000000000..c0b459bb66a7 --- /dev/null +++ b/drivers/accel/amdpk/Kconfig @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for AMD PKI accelerator for versal-net +# + +config DRM_ACCEL_AMDPK + tristate "AMD PKI accelerator for versal-net" + depends on DRM_ACCEL + depends on ARM64 || COMPILE_TEST + help + Enables platform driver for AMD PKI accelerator that are designed + for high performance Public Key asymmetric crypto operations on AMD + versal-net. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called amdpk. diff --git a/drivers/accel/amdpk/Makefile b/drivers/accel/amdpk/Makefile new file mode 100644 index 000000000000..826f43ccebdf --- /dev/null +++ b/drivers/accel/amdpk/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for AMD PKI accelerator for versal-net +# + +obj-$(CONFIG_DRM_ACCEL_AMDPK) := amdpk.o + +amdpk-y := amdpk_drv.o diff --git a/drivers/accel/amdpk/amdpk_drv.c b/drivers/accel/amdpk/amdpk_drv.c new file mode 100644 index 000000000000..17c328d03db8 --- /dev/null +++ b/drivers/accel/amdpk/amdpk_drv.c @@ -0,0 +1,736 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2021 Silex Insight sa + * Copyright (c) 2018-2021 Beerten Engineering scs + * Copyright (c) 2025 Advanced Micro Devices, Inc. + */ + +/* + * Device Overview + * =============== + * AMD PKI accelerator is a device on AMD versal-net to execute public + * key asymmetric crypto operations like ECDSA, ECDH, RSA etc. with high + * performance. The driver provides accel interface to applications for + * configuring the device and performing the required operations. AMD PKI + * device comprises of multiple Barco Silex ba414 PKI engines bundled together, + * and providing a queue based interface to interact with these devices on AMD + * versal-net. + * + * Following figure provides the brief overview of the device interface with + * the software: + * + * +------------------+ + * | Software | + * +------------------+ + * | | + * | v + * | +-----------------------------------------------------------+ + * | | RAM | + * | | +----------------------------+ +---------------------+ | + * | | | RQ pages | | CQ pages | | + * | | | +------------------------+ | | +-----------------+ | | + * | | | | START (cmd) | | | | req_id | status | | | + * | | | | TFRI (addr, sz)---+ | | | | req_id | status | | | + * | | | | +-TFRO (addr, sz) | | | | | ... | | | + * | | | | | NTFY (req_id) | | | | +-----------------+ | | + * | | | +-|-------------------|--+ | | | | + * | | | | v | +---------------------+ | + * | | | | +-----------+ | | + * | | | | | input | | | + * | | | | | data | | | + * | | | v +-----------+ | | + * | | | +----------------+ | | + * | | | | output data | | | + * | | | +----------------+ | | + * | | +----------------------------+ | + * | | | + * | +-----------------------------------------------------------+ + * | + * | + * +---|----------------------------------------------------+ + * | v AMD PKI device | + * | +-------------------+ +------------------------+ | + * | | New request FIFO | --> | PK engines | | + * | +-------------------+ +------------------------+ | + * +--------------------------------------------------------+ + * + * To perform a crypto operation, the software writes a sequence of descriptors, + * into the RQ memory. This includes input data and designated location for the + * output data. After preparing the request, request offset (from the RQ memory + * region) is written into the NEW_REQUEST register. Request is then stored in a + * common hardware FIFO shared among all RQs. + * + * When a PK engine becomes available, device pops the request from the FIFO and + * fetches the descriptors. It DMAs the input data from RQ memory and executes + * the necessary computations. After computation is complete, the device writes + * output data back to RAM via DMA. Device then writes a new entry in CQ ring + * buffer in RAM, indicating completion of the request. Device also generates + * an interrupt for notifying completion to the software. + */ + +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/eventfd.h> +#include <drm/drm_accel.h> +#include <drm/drm_ioctl.h> + +#include "amdpk_drv.h" + +#define DRIVER_NAME "amdpk" + +static void amdpk_init_cq(struct amdpk_dev *pkdev, struct amdpk_cq *cq, + int szcode, char *base) +{ + cq->pkdev = pkdev; + cq->generation = 1; + cq->szcode = szcode; + cq->base = (u32 *)base; + cq->tail = 0; +} + +static int amdpk_pop_cq(struct amdpk_cq *cq, int *rid) +{ + u32 status = CQ_STATUS_VALID; + unsigned int sz; + u32 completion; + + completion = cq->base[cq->tail + 1]; + if ((completion & CQ_GENERATION_BIT) != cq->generation) + return CQ_STATUS_INVALID; + + *rid = (completion >> 16) & 0xffff; + /* read memory barrier: to avoid a race condition, the status field should + * not be read before the completion generation bit. Otherwise we could + * get stale outdated status data. + */ + rmb(); + status |= cq->base[cq->tail]; + /* advance completion queue tail */ + cq->tail += 2; + sz = 1 << (cq->szcode - 2); + if (cq->tail >= sz) { + cq->tail = 0; + cq->generation ^= 1; /* invert generation bit */ + } + + /* evaluate status from the completion queue */ + if (completion & CQ_COMPLETION_BIT) + status |= CQ_COMPLETION_ERROR; + + return status; +} + +static int amdpk_trigpos(struct amdpk_cq *cq) +{ + int trigpos; + + /* Set trigger position on next completed operation */ + trigpos = cq->tail / 2 + (cq->generation << (cq->szcode - 3)); + trigpos++; + trigpos &= (1 << (cq->szcode - 2)) - 1; + + return trigpos; +} + +static void amdpk_cq_workfn(struct kthread_work *work) +{ + struct amdpk_work *pkwork; + struct amdpk_dev *pkdev; + struct amdpk_user *user; + int qid, rid, trigpos; + u32 status; + + pkwork = to_amdpk_work(work); + pkdev = pkwork->pkdev; + qid = pkwork->qid; + + user = pkwork->user; + status = amdpk_pop_cq(&pkdev->work[qid]->pk_cq, &rid); + if (rid < user->rq_entries && status != CQ_STATUS_INVALID) { + u32 *status_mem; + + status_mem = (u32 *)user->stmem; + status_mem[rid] = status; + eventfd_signal(user->evfd_ctx[rid]); + } + + trigpos = amdpk_trigpos(&pkdev->work[qid]->pk_cq); + pk_wrreg(pkdev->regs, REG_CTL_CQ_NTFY(user->qid), trigpos); +} + +static irqreturn_t amdpk_cq_irq(int irq, void *dev) +{ + struct amdpk_dev *pkdev = (struct amdpk_dev *)dev; + u64 active = 0; + int i; + + active = pk_rdreg(pkdev->regs, REG_PK_IRQ_STATUS); + pk_wrreg(pkdev->regs, REG_PK_IRQ_RESET, active); + + for (i = 0; i < pkdev->max_queues && active; i++, active >>= 1) { + if (!(active & 1)) + continue; + if (!pkdev->users[i]) + continue; + kthread_queue_work(pkdev->work[i]->cq_wq, &pkdev->work[i]->cq_work); + } + + return IRQ_HANDLED; +} + +static void amdpk_free_rqmem(struct amdpk_dev *pkdev, struct amdpk_user *user) +{ + int pages = user->rq_pages; + int pagemult = pages / 4; + int i; + + for (i = 0; i < pages / pagemult; i++) { + if (!user->rqmem[i]) + continue; + dma_free_coherent(pkdev->dev, PAGE_SIZE * pagemult, + user->rqmem[i], user->physrq[i]); + user->rqmem[i] = NULL; + } +} + +static int amdpk_accel_get_info(struct drm_device *dev, void *data, struct drm_file *fp) +{ + struct amdpk_user *user = fp->driver_priv; + struct amdpk_dev *pkdev = user->pkdev; + struct amdpk_info *info = data; + + info->avail_qdepth = atomic_read(&pkdev->avail_qdepth); + return 0; +} + +static int amdpk_accel_configure(struct amdpk_user *user, struct amdpk_conf *conf) +{ + struct amdpk_dev *pkdev = user->pkdev; + struct amdpk_work *pkwork = NULL; + int qid = user->qid; + int trigpos, ret, i; + char wq_name[32]; + + i = atomic_sub_return(conf->qdepth, &pkdev->avail_qdepth); + if (i < 0) { + /* If enough entries are not present, give back the reserved entries. */ + dev_err(user->pkdev->dev, "Out of descriptors\n"); + atomic_add(conf->qdepth, &pkdev->avail_qdepth); + return -ENOSPC; + } + user->rq_entries = conf->qdepth; + + for (i = 0; i < user->rq_entries; i++) { + if (conf->eventfd[i] <= 0) { + dev_err(user->pkdev->dev, "Invalid eventfd: %d\n", conf->eventfd[i]); + ret = -EINVAL; + goto fail; + } + + user->evfd_ctx[i] = eventfd_ctx_fdget(conf->eventfd[i]); + if (IS_ERR(user->evfd_ctx[i])) { + dev_err(user->pkdev->dev, "Invalid eventfd: %d\n", conf->eventfd[i]); + ret = PTR_ERR(user->evfd_ctx[i]); + goto fail; + } + } + + user->cqmem = dma_alloc_coherent(pkdev->dev, PAGE_SIZE, &user->physcq, GFP_KERNEL); + if (!user->cqmem) { + ret = -ENOMEM; + goto fail; + } + + /* Initialize completion queue handler */ + pkwork = pkdev->work[qid]; + amdpk_init_cq(pkdev, &pkwork->pk_cq, __builtin_ctz(PAGE_SIZE), user->cqmem); + + snprintf(wq_name, sizeof(wq_name), "cq_worker_%d", qid); + pkwork->cq_wq = kthread_create_worker(0, wq_name); + if (IS_ERR(pkwork->cq_wq)) { + ret = PTR_ERR(pkwork->cq_wq); + pkwork->cq_wq = NULL; + goto fail; + } + kthread_init_work(&pkwork->cq_work, amdpk_cq_workfn); + + pk_wrreg(pkdev->regs, REG_CQ_CFG_IRQ_NR(qid), qid); + pk_wrreg(pkdev->regs, REG_CQ_CFG_ADDR(qid), user->physcq); + pk_wrreg(pkdev->regs, REG_CQ_CFG_SIZE(qid), PAGE_SHIFT); + pk_wrreg(pkdev->regs, REG_RQ_CFG_CQID(qid), qid); + pk_wrreg(pkdev->regs, REG_RQ_CFG_DEPTH(qid), user->rq_entries); + + /* set trigger position for notifications */ + trigpos = amdpk_trigpos(&pkwork->pk_cq); + pk_wrreg(pkdev->regs, REG_CTL_CQ_NTFY(qid), trigpos); + + return 0; +fail: + if (pkwork->cq_wq) { + kthread_destroy_worker(pkwork->cq_wq); + pkwork->cq_wq = NULL; + } + if (user->cqmem) { + dma_free_coherent(pkdev->dev, PAGE_SIZE, user->cqmem, user->physcq); + user->cqmem = NULL; + } + atomic_add(user->rq_entries, &pkdev->avail_qdepth); + user->rq_entries = 0; + + return ret; +} + +static int amdpk_accel_set_conf(struct drm_device *dev, void *data, struct drm_file *fp) +{ + struct amdpk_user *user = fp->driver_priv; + struct amdpk_conf *conf = data; + int ret; + + if (conf->qdepth == 0 || conf->qdepth > MAX_CQ_ENTRIES_ON_PAGE) { + dev_err(user->pkdev->dev, "Invalid qdepth: %d\n", conf->qdepth); + return -EINVAL; + } + + if (user->configured) { + dev_err(user->pkdev->dev, "User already configured\n"); + return -EEXIST; + } + + ret = amdpk_accel_configure(user, conf); + if (ret) + return ret; + + user->configured = true; + return 0; +} + +static int amdpk_mmap_regs(struct vm_area_struct *vma) +{ + struct amdpk_user *user = vma->vm_private_data; + struct amdpk_dev *pkdev = user->pkdev; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + return io_remap_pfn_range(vma, vma->vm_start, + (pkdev->regsphys + REG_CTL_BASE(user->qid)) >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} + +static int mmap_dmamem(struct vm_area_struct *vma, struct amdpk_dev *pkdev, + void *addr, dma_addr_t phys, off_t offset, size_t sz) +{ + unsigned long vmstart = vma->vm_start; + unsigned long pgoff = vma->vm_pgoff; + int ret; + + vma->vm_pgoff = 0; + vma->vm_start = vmstart + offset; + vma->vm_end = vma->vm_start + sz; + ret = dma_mmap_coherent(pkdev->dev, vma, addr, phys, sz); + vma->vm_pgoff = pgoff; + vma->vm_start = vmstart; + + return ret; +} + +static int amdpk_mmap_mem(struct vm_area_struct *vma) +{ + struct amdpk_user *user = vma->vm_private_data; + struct amdpk_dev *pkdev = user->pkdev; + int pagemult, pagemultshift; + int requested_pages; + int qid = user->qid; + int ret, i; + + if (!user->configured) { + dev_err(pkdev->dev, "configuration not found!"); + return -ENODEV; + } + /* Mapping already done */ + if (user->stmem) { + dev_err(pkdev->dev, "memory already mapped\n"); + return -EINVAL; + } + + requested_pages = vma_pages(vma); + /* As the last page is reserved for the status and the starting ones are for + * the rq, the mmap must be at least 2 pages big. + */ + if (requested_pages < 2) { + dev_err(pkdev->dev, "Invalid request pages: %d\n", requested_pages); + return -EINVAL; + } + /* Store number of rq pages. 1 page is reserved for status */ + user->rq_pages = requested_pages - 1; + /* Requests memory can have up to 4 hardware pages. All hardware pages have the + * same size. If requesting more than 4 OS pages, the hardware pages will use + * the same multiple (pagemult) of OS pages. Thus the requested size for the + * request queue must be a multiple of pagemult. + */ + pagemult = (requested_pages - 1 + 3) / 4; + if ((requested_pages - 1) % pagemult != 0) { + dev_err(pkdev->dev, "requested pages: %d not multiple of page multiplier: %d\n", + requested_pages, pagemult); + return -EINVAL; + } + /* hardware page size must be a power of 2, and as a consequence pagemult too. */ + if ((pagemult & (pagemult - 1)) != 0) { + dev_err(pkdev->dev, "page multiplier: %d is not power of 2\n", pagemult); + return -EINVAL; + } + + for (i = 0; i < (requested_pages - 1) / pagemult; i++) { + user->rqmem[i] = dma_alloc_coherent(pkdev->dev, PAGE_SIZE * pagemult, + &user->physrq[i], GFP_KERNEL); + if (!user->rqmem[i]) { + ret = -ENOMEM; + goto fail; + } + pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE(qid, i), user->physrq[i]); + } + + user->stmem = dma_alloc_coherent(pkdev->dev, PAGE_SIZE, &user->physst, GFP_KERNEL); + if (!user->stmem) { + ret = -ENOMEM; + goto fail; + } + + /* Configure unused rq pages with start of allocated shared mem. Those should not + * be accessed, but if descriptors of a (malicious) user writes descriptors for + * those pages, it will not break the rest of the system. + */ + for (i = (requested_pages - 1) / pagemult; i < MAX_RQMEM_PER_QUEUE; i++) + pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE(qid, i), user->physrq[0]); + + pagemultshift = pagemult - 1; + pagemultshift = (pagemultshift & 5) + ((pagemultshift & 0xa) >> 1); + pagemultshift = (pagemultshift & 3) + ((pagemultshift >> 2) & 3); + pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGE_SIZE(qid), PAGE_SHIFT + pagemultshift); + pk_wrreg(pkdev->regs, REG_RQ_CFG_PAGES_WREN(qid), + (1 << ((requested_pages - 1) / pagemult))); + + ret = mmap_dmamem(vma, pkdev, user->stmem, user->physst, 0, PAGE_SIZE); + if (ret) + goto fail; + for (i = 0; i < (requested_pages - 1) / pagemult; i++) { + ret = mmap_dmamem(vma, pkdev, user->rqmem[i], user->physrq[i], + (i * pagemult + 1) * PAGE_SIZE, PAGE_SIZE * pagemult); + if (ret) + goto fail; + } + + return 0; + +fail: + amdpk_free_rqmem(pkdev, user); + if (user->stmem) { + dma_free_coherent(pkdev->dev, PAGE_SIZE, user->stmem, user->physst); + user->stmem = NULL; + } + return ret; +} + +static int amdpk_accel_mmap(struct file *fp, struct vm_area_struct *vma) +{ + struct drm_file *dfp = fp->private_data; + struct amdpk_user *user; + int ret = 0; + + user = dfp->driver_priv; + if (vma->vm_end < vma->vm_start) + return -EINVAL; + + vma->vm_private_data = user; + + switch (vma->vm_pgoff) { + case AMDPK_MMAP_REGS: + ret = amdpk_mmap_regs(vma); + break; + case AMDPK_MMAP_MEM: + ret = amdpk_mmap_mem(vma); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int amdpk_open(struct drm_device *dev, struct drm_file *file) +{ + struct amdpk_work *pkwork = NULL; + struct amdpk_user *user = NULL; + struct amdpk_dev *pkdev; + int ret, qid; + + pkdev = to_amdpk_dev(dev); + qid = ida_alloc_range(&pkdev->avail_queues, 0, pkdev->max_queues - 1, GFP_KERNEL); + if (qid < 0) + return -ENOSPC; + + get_device(pkdev->dev); + + user = kzalloc(sizeof(*user), GFP_KERNEL); + if (!user) { + ret = -ENOMEM; + goto fail; + } + user->pkdev = pkdev; + user->qid = qid; + user->rq_entries = 0; + file->driver_priv = user; + pkdev->users[qid] = user; + + pkwork = kzalloc(sizeof(*pkwork), GFP_KERNEL); + if (!pkwork) { + ret = -ENOMEM; + goto fail; + } + pkwork->qid = qid; + pkwork->pkdev = pkdev; + pkwork->user = user; + pkdev->work[qid] = pkwork; + + return 0; + +fail: + kfree(user); + kfree(pkwork); + ida_free(&pkdev->avail_queues, qid); + put_device(pkdev->dev); + return ret; +} + +static void amdpk_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct amdpk_user *user = file->driver_priv; + struct amdpk_dev *pkdev = user->pkdev; + char __iomem *regs = pkdev->regs; + + /* Set pkdev->users[qid] to NULL first, so that Completion interrupt handler gets + * to know that this user will not exists, and so it does not schedule any completion + * work on cq worker kthread. + */ + pkdev->users[user->qid] = NULL; + + if (user->configured) { + unsigned int attempts = 0; + + /* Disable RQCQ pages to help the hardware finish potential + * pending requests sooner. + */ + pk_wrreg(regs, REG_RQ_CFG_PAGE_SIZE(user->qid), 0); + pk_wrreg(regs, REG_RQ_CFG_PAGES_WREN(user->qid), 0); + pk_wrreg(regs, REG_CQ_CFG_SIZE(user->qid), 0); + + /* The hardware does not have a flush mechanism for the requests pending + * in the RQ. Instead check periodically with REG_CTL_PENDING_REQS if the + * user still has requests going on. If the hardware never completes the + * requests, abort after a MAX_FLUSH_WAIT_ATTEMPTS and don't free resources. + */ + while (pk_rdreg(regs, REG_CTL_BASE(user->qid) + REG_CTL_PENDING_REQS)) { + attempts++; + if (attempts > MAX_FLUSH_WAIT_ATTEMPTS) { + dev_err(pkdev->dev, + "Time out waiting for hw completions. Resources leaked.\n"); + goto abort_cleanup; + } + msleep(20); + } + + if (pkdev->work[user->qid]->cq_wq) { + kthread_cancel_work_sync(&pkdev->work[user->qid]->cq_work); + kthread_destroy_worker(pkdev->work[user->qid]->cq_wq); + } + + amdpk_free_rqmem(pkdev, user); + if (user->cqmem) { + dma_free_coherent(pkdev->dev, PAGE_SIZE, user->cqmem, user->physcq); + user->cqmem = NULL; + } + if (user->stmem) { + dma_free_coherent(pkdev->dev, PAGE_SIZE, user->stmem, user->physst); + user->stmem = NULL; + } + + atomic_add(user->rq_entries, &pkdev->avail_qdepth); + } + ida_free(&pkdev->avail_queues, user->qid); + +abort_cleanup: + put_device(pkdev->dev); + kfree(pkdev->work[user->qid]); + pkdev->work[user->qid] = NULL; + kfree(user); +} + +static const struct drm_ioctl_desc amdpk_accel_ioctls[] = { + DRM_IOCTL_DEF_DRV(AMDPK_GET_INFO, amdpk_accel_get_info, 0), + DRM_IOCTL_DEF_DRV(AMDPK_SET_CONF, amdpk_accel_set_conf, 0), +}; + +static const struct file_operations amdpk_accel_fops = { + .owner = THIS_MODULE, + .open = accel_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, + .mmap = amdpk_accel_mmap, +}; + +static const struct drm_driver amdpk_accel_driver = { + .driver_features = DRIVER_COMPUTE_ACCEL, + + .name = "amdpk_accel_driver", + .desc = "AMD PKI Accelerator for versal-net", + + .fops = &amdpk_accel_fops, + .open = amdpk_open, + .postclose = amdpk_postclose, + + .ioctls = amdpk_accel_ioctls, + .num_ioctls = ARRAY_SIZE(amdpk_accel_ioctls), +}; + +static int amdpk_create_device(struct amdpk_dev *pkdev, struct device *dev, int irq) +{ + u64 qdepth, ver; + long magic; + int ret; + + magic = pk_rdreg(pkdev->regs, REG_MAGIC); + if (magic != AMDPK_MAGIC) { + dev_err(dev, "Invalid magic constant %08lx !\n", magic); + return -ENODEV; + } + ver = pk_rdreg(pkdev->regs, REG_SEMVER); + if (AMDPK_SEMVER_MAJOR(ver) != 1 || AMDPK_SEMVER_MINOR(ver) < 1) { + dev_err(dev, "Hardware version (%d.%d) not supported.\n", + (int)AMDPK_SEMVER_MAJOR(ver), (int)AMDPK_SEMVER_MINOR(ver)); + return -ENODEV; + } + + /* Reset all accelerators and the hw scheduler */ + pk_wrreg(pkdev->regs, REG_PK_GLOBAL_STATE, 0x1); + pk_wrreg(pkdev->regs, REG_PK_GLOBAL_STATE, 0x0); + + pkdev->max_queues = (int)pk_rdreg(pkdev->regs, REG_CFG_REQ_QUEUES_CNT); + qdepth = pk_rdreg(pkdev->regs, REG_CFG_MAX_PENDING_REQ); + atomic_set(&pkdev->avail_qdepth, qdepth); + pkdev->max_queues = (int)pk_rdreg(pkdev->regs, REG_CFG_REQ_QUEUES_CNT); + + pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0); + pk_wrreg(pkdev->regs, REG_PK_IRQ_RESET, ~0); + pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, (1 << pkdev->max_queues) - 1); + + ret = devm_request_irq(dev, irq, amdpk_cq_irq, 0, "amdpk", pkdev); + if (ret) + return ret; + + ida_init(&pkdev->avail_queues); + + return 0; +} + +static void amdpk_remove_device(struct amdpk_dev *pkdev) +{ + drm_dev_unplug(&pkdev->ddev); + pk_wrreg(pkdev->regs, REG_IRQ_ENABLE, 0); + ida_destroy(&pkdev->avail_queues); +} + +static int amdpk_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct amdpk_dev *pkdev; + struct resource *memres; + int irq, ret; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret < 0) + return ret; + + pkdev = devm_drm_dev_alloc(dev, &amdpk_accel_driver, typeof(*pkdev), ddev); + if (IS_ERR(pkdev)) + return PTR_ERR(pkdev); + pkdev->dev = dev; + + memres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pkdev->regs = devm_ioremap_resource(dev, memres); + if (IS_ERR(pkdev->regs)) + return PTR_ERR(pkdev->regs); + pkdev->regsphys = memres->start; + platform_set_drvdata(pdev, pkdev); + + if (platform_irq_count(pdev) != 1) + return -ENODEV; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return -ENODEV; + + ret = drm_dev_register(&pkdev->ddev, 0); + if (ret) { + dev_err(&pdev->dev, "DRM register failed, ret %d", ret); + return ret; + } + + return amdpk_create_device(pkdev, dev, irq); +} + +static void amdpk_remove(struct platform_device *pdev) +{ + struct amdpk_dev *pkdev = platform_get_drvdata(pdev); + + amdpk_remove_device(pkdev); +} + +static void amdpk_shutdown(struct platform_device *pdev) +{ + amdpk_remove(pdev); +} + +static const struct of_device_id amdpk_match_table[] = { + { .compatible = "amd,versal-net-pki" }, + { }, +}; +MODULE_DEVICE_TABLE(of, amdpk_match_table); + +static struct platform_driver amdpk_pdrv = { + .probe = amdpk_probe, + .remove = amdpk_remove, + .shutdown = amdpk_shutdown, + .driver = { + .name = DRIVER_NAME, + .of_match_table = amdpk_match_table, + }, +}; + +static int __init amdpk_init(void) +{ + int ret; + + ret = platform_driver_register(&amdpk_pdrv); + if (ret) { + pr_err("can't register platform driver\n"); + return ret; + } + + return 0; +} + +static void __exit amdpk_exit(void) +{ + platform_driver_unregister(&amdpk_pdrv); +} + +module_init(amdpk_init); +module_exit(amdpk_exit); + +MODULE_AUTHOR("AMD"); +MODULE_DESCRIPTION("AMD PKI accelerator for versal-net"); +MODULE_LICENSE("GPL"); diff --git a/drivers/accel/amdpk/amdpk_drv.h b/drivers/accel/amdpk/amdpk_drv.h new file mode 100644 index 000000000000..c14c10db5d97 --- /dev/null +++ b/drivers/accel/amdpk/amdpk_drv.h @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018-2021 Silex Insight sa + * Copyright (c) 2018-2021 Beerten Engineering scs + * Copyright (c) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __AMDPK_DRV_H__ +#define __AMDPK_DRV_H__ + +#include <linux/types.h> +#include <linux/kthread.h> +#include <linux/io.h> +#include <drm/drm_drv.h> +#include <uapi/drm/amdpk.h> + +/* Magic number in the AMD PKI device, required to validate hardware access. */ +#define AMDPK_MAGIC 0x5113C50C + +/* Contains magic number 0x5113C5OC. + * Used to validate access to the hardware registers. + */ +#define REG_MAGIC (0x00) + +/* Contains version of the hardware interface as semver. + * The semantic version : major 8 bits, minor 8 bits in little endian order. + */ +#define REG_SEMVER (0x08) + +/* The number of request queues available in the hardware. */ +#define REG_CFG_REQ_QUEUES_CNT 0x10 + +/* The maximum number of pending requests from all request queues combined. */ +#define REG_CFG_MAX_PENDING_REQ 0x18 + +/* The maximum number of pending requests in a single request queue. */ +#define REG_CFG_MAX_REQ_QUEUE_ENTRIES 0x0020 + +/* The first 16 bits give the amount of PK core instances with 64 multipliers. + * The next 16 bits give the amount of PK core instances with 256 multipliers. + */ +#define REG_CFG_PK_INST 0x28 + +/* Writing 0x1 puts all pkcore accelerators and scheduler in reset. + * Writing 0x0 makes all pkcore accelerators and scheduler leave reset + * and become operational. + */ +#define REG_PK_GLOBAL_STATE 0x38 + +/* The semantic version : major 8 bits, minor 8 bits, + * scm id 16 bits in little endian order. + */ +#define REG_HW_VERSION (0x40) + +/* Bitmask of which CQ interrupts are raised. */ +#define REG_PK_IRQ_STATUS 0x88 + +/* Bitmask of which CQ may trigger interrupts. */ +#define REG_IRQ_ENABLE 0x90 + +/* Bitmask of CQ interrupts to reset. */ +#define REG_PK_IRQ_RESET 0xA0 + +/* Bus address of the page p for the given request queue. + * The address must be aligned on the page size. + */ +#define REG_RQ_CFG_PAGE(qid, pageidx) (0x00100 + (qid) * 0x80 + (pageidx) * 0x8) + +/* Size in bytes of the pages represented as a power of 2. + * + * Allowed values : + * ================ ============== + * register value size in bytes + * ================ ============== + * 7 128 + * 8 256 + * 9 512 + * 10 1024 + * 11 2048 + * 12 4096 + * 13 8192 + * 14 16384 + * 15 32768 + * 16 65536 + * ================ ============== + */ +#define REG_RQ_CFG_PAGE_SIZE(qid) (0x00120 + (qid) * 0x80) + +/* Index of the associated completion queue. */ +#define REG_RQ_CFG_CQID(qid) (0x00128 + (qid) * 0x80) + +/* Bit field of pages where descriptor can write to. + * When a bit is 1, a descriptor can write to the corresponding page. + */ +#define REG_RQ_CFG_PAGES_WREN(qid) (0x00138 + (qid) * 0x80) + +/* Maximum number of entries which can be written into this request queue. */ +#define REG_RQ_CFG_DEPTH(qid) (0x00140 + (qid) * 0x80) + +/* Bus address of the ring base of completion queue n. + * The address must be aligned on 64 bits. + */ +#define REG_CQ_CFG_ADDR(qid) (0x1100 + (qid) * 0x80) + +/* CQ notification trigger position. */ +#define REG_CTL_CQ_NTFY(qid) (0x2028 + (qid) * 0x1000) + +/* Size in bytes of the completion ring represented as a power of 2. + * + * Allowed sizes : + * ================ ============== ============== + * register value size in bytes max entries + * ================ ============== ============== + * 7 128 16 + * 8 256 32 + * 9 512 64 + * 10 1024 128 + * 11 2048 256 + * 12 4096 512 + * 13 8192 1024 + * 14 16384 2048 + * ================ ============== ============== + */ +#define REG_CQ_CFG_SIZE(qid) (0x1108 + (qid) * 0x80) + +/* Interrupt number for this completion queue. */ +#define REG_CQ_CFG_IRQ_NR(qid) (0x1110 + (qid) * 0x80) + +/* Control registers base address for the given request completion queue pair. */ +#define REG_CTL_BASE(qid) (0x2000 + (qid) * 0x1000) + +/* Count of how many requests are queued at a given time for this RQCQ. + * When this count reaches 0, the resources of the request and + * completion queues can be deleted. + */ +#define REG_CTL_PENDING_REQS 0x18 + +/* Busy cycle count register address. */ +#define REG_PK_BUSY_CYCLES 0x2108 +/* Busy cycle count register address.*/ +#define REG_PK_IDLE_CYCLES 0x2110 + +/* Hardware interface versions. */ +#define AMDPK_SEMVER_MAJOR(v) (((v) >> 24) & 0xff) +#define AMDPK_SEMVER_MINOR(v) (((v) >> 16) & 0xff) +#define AMDPK_SEMVER_PATCH(v) ((v) & 0xffff) + +/* Hardware implementation versions. */ +#define AMDPK_HWVER_MAJOR(v) (((v) >> 24) & 0xff) +#define AMDPK_HWVER_MINOR(v) (((v) >> 16) & 0xff) +#define AMDPK_HWVER_SVN(v) ((v) & 0xffff) + +/* Maximum number of queues supported by the driver. */ +#define MAX_QUEUES 4 + +/* Number of RQ memory addresses for each queue. */ +#define MAX_RQMEM_PER_QUEUE 4 + +/* Wait attempts for HW to flush all requests before close. */ +#define MAX_FLUSH_WAIT_ATTEMPTS 500 + +/* Bit 0 (0x1) is the Generation bit. */ +#define CQ_GENERATION_BIT BIT(0) + +/* Bit 1 (0x2) is set when completion is valid. */ +#define CQ_COMPLETION_BIT BIT(1) + +/* Maximal value of rq_entries is 512. There is 1 CQ of 4K bytes. + * Each completion status is 8 Bytes. Only 4096 / 8 = 512 entries + * are possible at any time. + */ +#define MAX_CQ_ENTRIES_ON_PAGE (PAGE_SIZE / 8) + +/* Forward declaration */ +struct amdpk_dev; +struct amdpk_user; + +/* structure to hold completion queue information */ +struct amdpk_cq { + /* PKI device */ + struct amdpk_dev *pkdev; + /* Base address of the completion queue */ + u32 *base; + /* tail representing last completion */ + unsigned int tail; + /* generation bit which toggles as per the device */ + unsigned int generation; + /* size code as configured in REG_RQ_CFG_PAGE_SIZE */ + u16 szcode; +}; + +/* represents PKI work context */ +struct amdpk_work { + /* PKI device */ + struct amdpk_dev *pkdev; + /* PKI user */ + struct amdpk_user *user; + /* Completion queue */ + struct amdpk_cq pk_cq; + /* Kthread work associated with the PKI work */ + struct kthread_work cq_work; + /* Kthred worker to handle completions */ + struct kthread_worker *cq_wq; + /* Associated queue ID */ + u16 qid; +}; + +/* AMD PKI device */ +struct amdpk_dev { + /* DRM device associated with PKI device */ + struct drm_device ddev; + /* Core device */ + struct device *dev; + /* PKI register space address */ + char __iomem *regs; + /* PKI register space physical address */ + resource_size_t regsphys; + /* Maximum queues supported by device. */ + u16 max_queues; + /* Available queues */ + struct ida avail_queues; + /* Total available queues */ + atomic_t avail_qdepth; + /* List of all the AMD users */ + struct amdpk_user *users[MAX_QUEUES]; + /* PKI work for each queue */ + struct amdpk_work *work[MAX_QUEUES]; +}; + +/* AMD PKI user */ +struct amdpk_user { + /* PKI device */ + struct amdpk_dev *pkdev; + /* Indicates if user has been configured */ + bool configured; + /* Queue ID allocated for the user */ + u16 qid; + /* Number of pages allocated on request queue */ + u16 rq_pages; + /* RQ entries reserved for this user */ + size_t rq_entries; + /* DMA address for RQ pages */ + dma_addr_t physrq[MAX_RQMEM_PER_QUEUE]; + /* RQ pages addresses */ + u8 *rqmem[MAX_RQMEM_PER_QUEUE]; + /* DMA address for CQ page */ + dma_addr_t physcq; + /* CQ page address */ + u8 *cqmem; + /* DMA address for status page */ + dma_addr_t physst; + /* Status page address */ + u8 *stmem; + /* Eventfd context for each request */ + struct eventfd_ctx *evfd_ctx[MAX_PK_REQS]; +}; + +#define to_amdpk_dev(dev) container_of(dev, struct amdpk_dev, ddev) +#define to_amdpk_work(work) container_of(work, struct amdpk_work, cq_work) + +static void __maybe_unused pk_wrreg(char __iomem *regs, int addr, u64 val) +{ + iowrite64(val, regs + addr); +} + +static u64 pk_rdreg(char __iomem *regs, int addr) +{ + return ioread64(regs + addr); +} + +#endif /* __AMDPK_DRV_H__ */ diff --git a/include/uapi/drm/amdpk.h b/include/uapi/drm/amdpk.h new file mode 100644 index 000000000000..e5e18fdbc2c4 --- /dev/null +++ b/include/uapi/drm/amdpk.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __AMDPK_H__ +#define __AMDPK_H__ + +#if defined(__cplusplus) +extern "C" { +#endif + +#define MAX_PK_REQS 256 + +struct amdpk_info { + /** maximum available queue depth */ + unsigned int avail_qdepth; +}; + +struct amdpk_conf { + /** queue depth to configure */ + unsigned int qdepth; + /** eventfd's associated with the descriptors */ + int eventfd[MAX_PK_REQS]; +}; + +/* IOCTL */ +#define DRM_AMDPK_GET_INFO 0x0 +#define DRM_AMDPK_SET_CONF 0x1 + +#define DRM_IOCTL_AMDPK_GET_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDPK_GET_INFO, \ + struct amdpk_info) +#define DRM_IOCTL_AMDPK_SET_CONF DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDPK_SET_CONF, \ + struct amdpk_conf) + +/* MMAP */ +#define AMDPK_MMAP_REGS 0 +#define AMDPK_MMAP_MEM 1 + +/* Completion Status */ +#define CQ_STATUS_INVALID 0x0 +#define CQ_STATUS_VALID 0x80000000 +#define CQ_COMPLETION_ERROR 0x40000000 + +#if defined(__cplusplus) +} +#endif + +#endif /* __AMDPK_H__ */ -- 2.34.1