amdgpu_pasid_free_delayed() registers a callback on the VM root BO's
BOOKKEEP singleton fence. That fence can include the TLB flush fence;
when amdgpu_tlb_fence_work() signals it, dma_fence_signal() holds the
fence spinlock and may run nested dma_fence_chain work under irq-safe
locks. However, amdgpu_pasid_free() takes xa_lock, which lockdep treats
as irq-unsafe relative to that chain. Invoking it from
the fence callback created a HARDIRQ-safe -> HARDIRQ-unsafe dependency
lockdep inversion as following if call the amdgpu_pasid_free() immediatly.
[ 4127.396125] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected
[ 4127.396400] 6.19.0-custom #16 Tainted: G U OE
[ 4127.396633] -----------------------------------------------------
[ 4127.396885] kworker/11:1/3530 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
[ 4127.397173] ffffffffc1a6ea78 (amdgpu_pasid_xa.xa_lock){+.+.}-{3:3}, at:
xa_erase+0x17/0x40
[ 4127.397525]
and this task is already holding:
[ 4127.397767] ffff8e639340cfb8 (&f->lock){....}-{3:3}, at:
dma_fence_signal+0x2a/0x80
[ 4127.398095] which would create a new lock dependency:
[ 4127.398312] (&f->lock){....}-{3:3} -> (amdgpu_pasid_xa.xa_lock){+.+.}-{3:3}
[ 4127.398619]
but this new dependency connects a HARDIRQ-irq-safe lock:
[ 4127.398944] (&chain->lock){-...}-{3:3}
[ 4127.398951]
... which became HARDIRQ-irq-safe at:
[ 4127.399369] lock_acquire+0xc6/0x2c0
[ 4127.399531] _raw_spin_lock_irqsave+0x54/0xa0
[ 4127.399720] dma_fence_signal+0x2a/0x80
[ 4127.399889] dma_fence_chain_irq_work+0x50/0x70
[ 4127.400085] irq_work_single+0x49/0xa0
[ 4127.400253] irq_work_run_list+0x30/0x50
[ 4127.400425] irq_work_run+0x1c/0x40
[ 4127.400579] __sysvec_irq_work+0x38/0x120
[ 4127.400759] sysvec_irq_work+0x7e/0x90
[ 4127.400925] asm_sysvec_irq_work+0x1f/0x30
[ 4127.401105] _raw_spin_unlock_irqrestore+0x3b/0x60
[ 4127.401310] dma_fence_signal+0x48/0x80
[ 4127.401479] amdgpu_tlb_fence_work+0x8d/0x130 [amdgpu]
[ 4127.402127] process_one_work+0x233/0x650
[ 4127.402312] worker_thread+0x1b2/0x360
[ 4127.402477] kthread+0x11c/0x260
[ 4127.402622] ret_from_fork+0x29f/0x2f0
[ 4127.402790] ret_from_fork_asm+0x1a/0x30
[ 4127.402963]
to a HARDIRQ-irq-unsafe lock:
[ 4127.403193] (amdgpu_pasid_xa.xa_lock){+.+.}-{3:3}
[ 4127.403200]
... which became HARDIRQ-irq-unsafe at:
[ 4127.403662] ...
[ 4127.403665] lock_acquire+0xc6/0x2c0
[ 4127.403904] _raw_spin_lock+0x39/0x80
[ 4127.404069] amdgpu_pasid_alloc+0x7f/0x150 [amdgpu]
[ 4127.404681] amdgpu_driver_open_kms+0x9b/0x3b0 [amdgpu]
[ 4127.405281] drm_file_alloc+0x212/0x300 [drm]
[ 4127.405519] drm_client_init+0x84/0x130 [drm]
[ 4127.405749] amdgpu_amdkfd_drm_client_create+0x5f/0xa0 [amdgpu]
[ 4127.406449] amdgpu_pci_probe+0x470/0x6a0 [amdgpu]
[ 4127.407040] local_pci_probe+0x4f/0xb0
[ 4127.407209] pci_device_probe+0xdb/0x230
[ 4127.407382] really_probe+0xe5/0x3e0
[ 4127.407541] __driver_probe_device+0x7e/0x170
[ 4127.407731] driver_probe_device+0x23/0xa0
[ 4127.407910] __driver_attach+0xef/0x210
[ 4127.408079] bus_for_each_dev+0x83/0xd0
[ 4127.408249] driver_attach+0x22/0x30
[ 4127.408407] bus_add_driver+0x12d/0x250
[ 4127.408575] driver_register+0x68/0x130
[ 4127.408745] __pci_register_driver+0x81/0x90
[ 4127.408931] 0xffffffffc0bfd06b
[ 4127.409075] do_one_initcall+0x61/0x390
[ 4127.409245] do_init_module+0x6a/0x280
[ 4127.409411] load_module+0x22c5/0x23a0
[ 4127.409576] init_module_from_file+0xdf/0x100
[ 4127.409765] idempotent_init_module+0x1a3/0x2b0
[ 4127.409962] __x64_sys_finit_module+0x77/0xf0
[ 4127.410151] x64_sys_call+0x190a/0x21b0
[ 4127.410324] do_syscall_64+0x6f/0x760
[ 4127.410487] entry_SYSCALL_64_after_hwframe+0x76/0x7e
Signed-off-by: Prike Liang <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 29 +++++++++++++++++++++----
1 file changed, 25 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 7b0afeddbb05..bcbd4652780e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -24,7 +24,7 @@
#include <linux/xarray.h>
#include <linux/dma-fence-array.h>
-
+#include <linux/workqueue.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -45,6 +45,8 @@ static u32 amdgpu_pasid_next;
/* Helper to free pasid from a fence callback */
struct amdgpu_pasid_cb {
struct dma_fence_cb cb;
+ struct work_struct work;
+ struct dma_fence *fence;
u32 pasid;
};
@@ -89,15 +91,34 @@ void amdgpu_pasid_free(u32 pasid)
xa_erase(&amdgpu_pasid_xa, pasid);
}
+static void amdgpu_pasid_free_work(struct work_struct *work)
+{
+ struct amdgpu_pasid_cb *cb = container_of(work, struct amdgpu_pasid_cb,
+ work);
+ amdgpu_pasid_free(cb->pasid);
+ /* put the fence referenced by amdgpu_pasid_free_cb()*/
+ dma_fence_put(cb->fence);
+ /* put the fence referenced by amdgpu_pasid_free_delayed()*/
+ dma_fence_put(cb->fence);
+ kfree(cb);
+}
+
static void amdgpu_pasid_free_cb(struct dma_fence *fence,
struct dma_fence_cb *_cb)
{
struct amdgpu_pasid_cb *cb =
container_of(_cb, struct amdgpu_pasid_cb, cb);
- amdgpu_pasid_free(cb->pasid);
- dma_fence_put(fence);
- kfree(cb);
+ /*
+ * dma_fence_signal() holds the signaled fence's lock and may run under
+ * dma_fence_chain irq_work (irq-safe locks). xa_lock is
+ * irq-unsafe; taking it here violates lock order and can deadlock.
+ * Defer PASID free and fence refcount drops to process context.
+ */
+ cb->fence = fence;
+ dma_fence_get(fence);
+ INIT_WORK(&cb->work, amdgpu_pasid_free_work);
+ queue_work(system_unbound_wq, &cb->work);
}
/**
--
2.34.1