both patches are queued for 6.14.2: https://lore.kernel.org/all/20250409115934.968141...@linuxfoundation.org/ issue was reported in our community forum: https://forum.proxmox.com/threads/.164497/post-762617
as we have access to a server where we could reproduce the issue (crash+loop, before the system was up[0]) I tested with those 2 a kernel with those 2 patches applied - and the system booted successfully. FWIW: I tried building with the original series as well (containing a removal of some PCI-ids), and it also resolved the issue: https://lore.kernel.org/all/20250203162511.911946-1-basavaraj.nati...@amd.com/ [0] before proxmox-boot-cleanup.service (so pinning with --next-boot did not help) Signed-off-by: Stoiko Ivanov <s.iva...@proxmox.com> --- ...-Use-the-MSI-count-and-its-correspon.patch | 31 +++ ...Utilize-the-AE4DMA-engine-s-multi-qu.patch | 201 ++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch create mode 100644 patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch diff --git a/patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch b/patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch new file mode 100644 index 000000000000..a31676273a98 --- /dev/null +++ b/patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch @@ -0,0 +1,31 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Basavaraj Natikar <basavaraj.nati...@amd.com> +Date: Mon, 3 Feb 2025 21:55:10 +0530 +Subject: [PATCH] dmaengine: ae4dma: Use the MSI count and its corresponding + IRQ number + +Instead of using the defined maximum hardware queue, which can lead to +incorrect values if the counts mismatch, use the exact supported MSI +count and its corresponding IRQ number. + +Fixes: 90a30e268d9b ("dmaengine: ae4dma: Add AMD ae4dma controller driver") +Signed-off-by: Basavaraj Natikar <basavaraj.nati...@amd.com> +--- + drivers/dma/amd/ae4dma/ae4dma-pci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c +index aad0dc4294a3945245737978c077eecf740ccb3a..587c5a10c1a8b2dbb925c31af86b1d0b23438b45 100644 +--- a/drivers/dma/amd/ae4dma/ae4dma-pci.c ++++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c +@@ -46,8 +46,8 @@ static int ae4_get_irqs(struct ae4_device *ae4) + + } else { + ae4_msix->msix_count = ret; +- for (i = 0; i < MAX_AE4_HW_QUEUES; i++) +- ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector; ++ for (i = 0; i < ae4_msix->msix_count; i++) ++ ae4->ae4_irq[i] = pci_irq_vector(pdev, i); + } + + return ret; diff --git a/patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch b/patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch new file mode 100644 index 000000000000..c59d69738779 --- /dev/null +++ b/patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch @@ -0,0 +1,201 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Basavaraj Natikar <basavaraj.nati...@amd.com> +Date: Mon, 3 Feb 2025 21:55:11 +0530 +Subject: [PATCH] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue + functionality +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +As AE4DMA offers multi-channel functionality compared to PTDMA’s single +queue, utilize multi-queue, which supports higher speeds than PTDMA, to +achieve higher performance using the AE4DMA workqueue based mechanism. + +Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version") +Signed-off-by: Basavaraj Natikar <basavaraj.nati...@amd.com> +--- + drivers/dma/amd/ae4dma/ae4dma.h | 2 + + drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++- + 2 files changed, 89 insertions(+), 3 deletions(-) + +diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h +index 265c5d4360080d6a0cc77f2bab507fde761d5461..57f6048726bb68da03e145d0c69f4bdcd4012c6f 100644 +--- a/drivers/dma/amd/ae4dma/ae4dma.h ++++ b/drivers/dma/amd/ae4dma/ae4dma.h +@@ -37,6 +37,8 @@ + #define AE4_DMA_VERSION 4 + #define CMD_AE4_DESC_DW0_VAL 2 + ++#define AE4_TIME_OUT 5000 ++ + struct ae4_msix { + int msix_count; + struct msix_entry msix_entry[MAX_AE4_HW_QUEUES]; +diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c +index 35c84ec9608b4fd119972e3cd9abedf818dff743..715ac3ae067b857830db85e170787e30f3ae6b1d 100644 +--- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c ++++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c +@@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, + { + struct dma_async_tx_descriptor *tx_desc; + struct virt_dma_desc *vd; ++ struct pt_device *pt; + unsigned long flags; + ++ pt = chan->pt; + /* Loop over descriptors until one is found with commands */ + do { + if (desc) { +@@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, + + spin_lock_irqsave(&chan->vc.lock, flags); + +- if (desc) { ++ if (pt->ver != AE4_DMA_VERSION && desc) { + if (desc->status != DMA_COMPLETE) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; +@@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, + + spin_unlock_irqrestore(&chan->vc.lock, flags); + +- if (tx_desc) { ++ if (pt->ver != AE4_DMA_VERSION && tx_desc) { + dmaengine_desc_get_callback_invoke(tx_desc, NULL); + dma_run_dependencies(tx_desc); + vchan_vdesc_fini(vd); +@@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, + return NULL; + } + ++static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q) ++{ ++ u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF); ++ u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF); ++ ++ if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1)) ++ return true; ++ ++ return false; ++} ++ + static void pt_cmd_callback(void *data, int err) + { + struct pt_dma_desc *desc = data; ++ struct ae4_cmd_queue *ae4cmd_q; + struct dma_chan *dma_chan; + struct pt_dma_chan *chan; ++ struct ae4_device *ae4; ++ struct pt_device *pt; + int ret; + + if (err == -EINPROGRESS) +@@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err) + + dma_chan = desc->vd.tx.chan; + chan = to_pt_chan(dma_chan); ++ pt = chan->pt; + + if (err) + desc->status = DMA_ERROR; + + while (true) { ++ if (pt->ver == AE4_DMA_VERSION) { ++ ae4 = container_of(pt, struct ae4_device, pt); ++ ae4cmd_q = &ae4->ae4cmd_q[chan->id]; ++ ++ if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) || ++ ae4_core_queue_full(&ae4cmd_q->cmd_q)) { ++ wake_up(&ae4cmd_q->q_w); ++ ++ if (wait_for_completion_timeout(&ae4cmd_q->cmp, ++ msecs_to_jiffies(AE4_TIME_OUT)) ++ == 0) { ++ dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id); ++ break; ++ } ++ ++ reinit_completion(&ae4cmd_q->cmp); ++ continue; ++ } ++ } ++ + /* Check for DMA descriptor completion */ + desc = pt_handle_active_desc(chan, desc); + +@@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan, + return desc; + } + ++static void pt_cmd_callback_work(void *data, int err) ++{ ++ struct dma_async_tx_descriptor *tx_desc; ++ struct pt_dma_desc *desc = data; ++ struct dma_chan *dma_chan; ++ struct virt_dma_desc *vd; ++ struct pt_dma_chan *chan; ++ unsigned long flags; ++ ++ dma_chan = desc->vd.tx.chan; ++ chan = to_pt_chan(dma_chan); ++ ++ if (err == -EINPROGRESS) ++ return; ++ ++ tx_desc = &desc->vd.tx; ++ vd = &desc->vd; ++ ++ if (err) ++ desc->status = DMA_ERROR; ++ ++ spin_lock_irqsave(&chan->vc.lock, flags); ++ if (desc) { ++ if (desc->status != DMA_COMPLETE) { ++ if (desc->status != DMA_ERROR) ++ desc->status = DMA_COMPLETE; ++ ++ dma_cookie_complete(tx_desc); ++ dma_descriptor_unmap(tx_desc); ++ } else { ++ tx_desc = NULL; ++ } ++ } ++ spin_unlock_irqrestore(&chan->vc.lock, flags); ++ ++ if (tx_desc) { ++ dmaengine_desc_get_callback_invoke(tx_desc, NULL); ++ dma_run_dependencies(tx_desc); ++ list_del(&desc->vd.node); ++ vchan_vdesc_fini(vd); ++ } ++} ++ + static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, + dma_addr_t dst, + dma_addr_t src, +@@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, + desc->len = len; + + if (pt->ver == AE4_DMA_VERSION) { ++ pt_cmd->pt_cmd_callback = pt_cmd_callback_work; + ae4 = container_of(pt, struct ae4_device, pt); + ae4cmd_q = &ae4->ae4cmd_q[chan->id]; + mutex_lock(&ae4cmd_q->cmd_lock); +@@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan) + { + struct pt_dma_chan *chan = to_pt_chan(dma_chan); + struct pt_dma_desc *desc; ++ struct pt_device *pt; + unsigned long flags; + bool engine_is_idle = true; + ++ pt = chan->pt; ++ + spin_lock_irqsave(&chan->vc.lock, flags); + + desc = pt_next_dma_desc(chan); +- if (desc) ++ if (desc && pt->ver != AE4_DMA_VERSION) + engine_is_idle = false; + + vchan_issue_pending(&chan->vc); -- 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel