both patches are queued for 6.14.2:
https://lore.kernel.org/all/20250409115934.968141...@linuxfoundation.org/
issue was reported in our community forum:
https://forum.proxmox.com/threads/.164497/post-762617

as we have access to a server where we could reproduce the issue
(crash+loop, before the system was up[0]) I tested with those 2
a kernel with those 2 patches applied - and the system booted
successfully.

FWIW: I tried building with the original series as well (containing a
removal of some PCI-ids), and it also resolved the issue:
https://lore.kernel.org/all/20250203162511.911946-1-basavaraj.nati...@amd.com/

[0] before proxmox-boot-cleanup.service (so pinning with --next-boot
did not help)

Signed-off-by: Stoiko Ivanov <s.iva...@proxmox.com>
---
 ...-Use-the-MSI-count-and-its-correspon.patch |  31 +++
 ...Utilize-the-AE4DMA-engine-s-multi-qu.patch | 201 ++++++++++++++++++
 2 files changed, 232 insertions(+)
 create mode 100644 
patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch
 create mode 100644 
patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch

diff --git 
a/patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch
 
b/patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch
new file mode 100644
index 000000000000..a31676273a98
--- /dev/null
+++ 
b/patches/kernel/0014-dmaengine-ae4dma-Use-the-MSI-count-and-its-correspon.patch
@@ -0,0 +1,31 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Basavaraj Natikar <basavaraj.nati...@amd.com>
+Date: Mon, 3 Feb 2025 21:55:10 +0530
+Subject: [PATCH] dmaengine: ae4dma: Use the MSI count and its corresponding
+ IRQ number
+
+Instead of using the defined maximum hardware queue, which can lead to
+incorrect values if the counts mismatch, use the exact supported MSI
+count and its corresponding IRQ number.
+
+Fixes: 90a30e268d9b ("dmaengine: ae4dma: Add AMD ae4dma controller driver")
+Signed-off-by: Basavaraj Natikar <basavaraj.nati...@amd.com>
+---
+ drivers/dma/amd/ae4dma/ae4dma-pci.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c 
b/drivers/dma/amd/ae4dma/ae4dma-pci.c
+index 
aad0dc4294a3945245737978c077eecf740ccb3a..587c5a10c1a8b2dbb925c31af86b1d0b23438b45
 100644
+--- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
++++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
+@@ -46,8 +46,8 @@ static int ae4_get_irqs(struct ae4_device *ae4)
+ 
+       } else {
+               ae4_msix->msix_count = ret;
+-              for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+-                      ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
++              for (i = 0; i < ae4_msix->msix_count; i++)
++                      ae4->ae4_irq[i] = pci_irq_vector(pdev, i);
+       }
+ 
+       return ret;
diff --git 
a/patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch
 
b/patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch
new file mode 100644
index 000000000000..c59d69738779
--- /dev/null
+++ 
b/patches/kernel/0015-dmaengine-ptdma-Utilize-the-AE4DMA-engine-s-multi-qu.patch
@@ -0,0 +1,201 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Basavaraj Natikar <basavaraj.nati...@amd.com>
+Date: Mon, 3 Feb 2025 21:55:11 +0530
+Subject: [PATCH] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue
+ functionality
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+As AE4DMA offers multi-channel functionality compared to PTDMA’s single
+queue, utilize multi-queue, which supports higher speeds than PTDMA, to
+achieve higher performance using the AE4DMA workqueue based mechanism.
+
+Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel 
and version")
+Signed-off-by: Basavaraj Natikar <basavaraj.nati...@amd.com>
+---
+ drivers/dma/amd/ae4dma/ae4dma.h         |  2 +
+ drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++-
+ 2 files changed, 89 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
+index 
265c5d4360080d6a0cc77f2bab507fde761d5461..57f6048726bb68da03e145d0c69f4bdcd4012c6f
 100644
+--- a/drivers/dma/amd/ae4dma/ae4dma.h
++++ b/drivers/dma/amd/ae4dma/ae4dma.h
+@@ -37,6 +37,8 @@
+ #define AE4_DMA_VERSION                       4
+ #define CMD_AE4_DESC_DW0_VAL          2
+ 
++#define AE4_TIME_OUT                  5000
++
+ struct ae4_msix {
+       int msix_count;
+       struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
+diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c 
b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
+index 
35c84ec9608b4fd119972e3cd9abedf818dff743..715ac3ae067b857830db85e170787e30f3ae6b1d
 100644
+--- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
++++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
+@@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct 
pt_dma_chan *chan,
+ {
+       struct dma_async_tx_descriptor *tx_desc;
+       struct virt_dma_desc *vd;
++      struct pt_device *pt;
+       unsigned long flags;
+ 
++      pt = chan->pt;
+       /* Loop over descriptors until one is found with commands */
+       do {
+               if (desc) {
+@@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct 
pt_dma_chan *chan,
+ 
+               spin_lock_irqsave(&chan->vc.lock, flags);
+ 
+-              if (desc) {
++              if (pt->ver != AE4_DMA_VERSION && desc) {
+                       if (desc->status != DMA_COMPLETE) {
+                               if (desc->status != DMA_ERROR)
+                                       desc->status = DMA_COMPLETE;
+@@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct 
pt_dma_chan *chan,
+ 
+               spin_unlock_irqrestore(&chan->vc.lock, flags);
+ 
+-              if (tx_desc) {
++              if (pt->ver != AE4_DMA_VERSION && tx_desc) {
+                       dmaengine_desc_get_callback_invoke(tx_desc, NULL);
+                       dma_run_dependencies(tx_desc);
+                       vchan_vdesc_fini(vd);
+@@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct 
pt_dma_chan *chan,
+       return NULL;
+ }
+ 
++static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
++{
++      u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
++      u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
++
++      if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN)  >= 
(MAX_CMD_QLEN - 1))
++              return true;
++
++      return false;
++}
++
+ static void pt_cmd_callback(void *data, int err)
+ {
+       struct pt_dma_desc *desc = data;
++      struct ae4_cmd_queue *ae4cmd_q;
+       struct dma_chan *dma_chan;
+       struct pt_dma_chan *chan;
++      struct ae4_device *ae4;
++      struct pt_device *pt;
+       int ret;
+ 
+       if (err == -EINPROGRESS)
+@@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
+ 
+       dma_chan = desc->vd.tx.chan;
+       chan = to_pt_chan(dma_chan);
++      pt = chan->pt;
+ 
+       if (err)
+               desc->status = DMA_ERROR;
+ 
+       while (true) {
++              if (pt->ver == AE4_DMA_VERSION) {
++                      ae4 = container_of(pt, struct ae4_device, pt);
++                      ae4cmd_q = &ae4->ae4cmd_q[chan->id];
++
++                      if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
++                          ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
++                              wake_up(&ae4cmd_q->q_w);
++
++                              if (wait_for_completion_timeout(&ae4cmd_q->cmp,
++                                                              
msecs_to_jiffies(AE4_TIME_OUT))
++                                                              == 0) {
++                                      dev_err(pt->dev, "TIMEOUT %d:\n", 
ae4cmd_q->id);
++                                      break;
++                              }
++
++                              reinit_completion(&ae4cmd_q->cmp);
++                              continue;
++                      }
++              }
++
+               /* Check for DMA descriptor completion */
+               desc = pt_handle_active_desc(chan, desc);
+ 
+@@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct 
pt_dma_chan *chan,
+       return desc;
+ }
+ 
++static void pt_cmd_callback_work(void *data, int err)
++{
++      struct dma_async_tx_descriptor *tx_desc;
++      struct pt_dma_desc *desc = data;
++      struct dma_chan *dma_chan;
++      struct virt_dma_desc *vd;
++      struct pt_dma_chan *chan;
++      unsigned long flags;
++
++      dma_chan = desc->vd.tx.chan;
++      chan = to_pt_chan(dma_chan);
++
++      if (err == -EINPROGRESS)
++              return;
++
++      tx_desc = &desc->vd.tx;
++      vd = &desc->vd;
++
++      if (err)
++              desc->status = DMA_ERROR;
++
++      spin_lock_irqsave(&chan->vc.lock, flags);
++      if (desc) {
++              if (desc->status != DMA_COMPLETE) {
++                      if (desc->status != DMA_ERROR)
++                              desc->status = DMA_COMPLETE;
++
++                      dma_cookie_complete(tx_desc);
++                      dma_descriptor_unmap(tx_desc);
++              } else {
++                      tx_desc = NULL;
++              }
++      }
++      spin_unlock_irqrestore(&chan->vc.lock, flags);
++
++      if (tx_desc) {
++              dmaengine_desc_get_callback_invoke(tx_desc, NULL);
++              dma_run_dependencies(tx_desc);
++              list_del(&desc->vd.node);
++              vchan_vdesc_fini(vd);
++      }
++}
++
+ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
+                                         dma_addr_t dst,
+                                         dma_addr_t src,
+@@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan 
*dma_chan,
+       desc->len = len;
+ 
+       if (pt->ver == AE4_DMA_VERSION) {
++              pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
+               ae4 = container_of(pt, struct ae4_device, pt);
+               ae4cmd_q = &ae4->ae4cmd_q[chan->id];
+               mutex_lock(&ae4cmd_q->cmd_lock);
+@@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
+ {
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       struct pt_dma_desc *desc;
++      struct pt_device *pt;
+       unsigned long flags;
+       bool engine_is_idle = true;
+ 
++      pt = chan->pt;
++
+       spin_lock_irqsave(&chan->vc.lock, flags);
+ 
+       desc = pt_next_dma_desc(chan);
+-      if (desc)
++      if (desc && pt->ver != AE4_DMA_VERSION)
+               engine_is_idle = false;
+ 
+       vchan_issue_pending(&chan->vc);
-- 
2.39.5



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to