On 6/6/25 18:08, Alex Deucher wrote: > On Fri, Jun 6, 2025 at 7:39 AM Christian König <christian.koe...@amd.com> > wrote: >> >> On 6/6/25 08:43, Alex Deucher wrote: >>> Use the amdgpu fence container so we can store additional >>> data in the fence. >>> >>> Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 30 +++++---------------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 ++++----- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 +++++++++++ >>> 6 files changed, 32 insertions(+), 32 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> index 8e626f50b362e..f81608330a3d0 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c >>> @@ -1902,7 +1902,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct >>> amdgpu_ring *ring) >>> continue; >>> } >>> job = to_amdgpu_job(s_job); >>> - if (preempted && (&job->hw_fence) == fence) >>> + if (preempted && (&job->hw_fence.base) == fence) >>> /* mark the job as preempted */ >>> job->preemption_status |= AMDGPU_IB_PREEMPTED; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> index ea565651f7459..8298e95e4543e 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> @@ -6375,7 +6375,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device >>> *adev, >>> * >>> * job->base holds a reference to parent fence >>> */ >>> - if (job && dma_fence_is_signaled(&job->hw_fence)) { >>> + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { >>> job_signaled = true; >>> dev_info(adev->dev, "Guilty job already signaled, skipping HW >>> reset"); >>> goto skip_hw_reset; >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> index 2f24a6aa13bf6..569e0e5373927 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c >>> @@ -41,22 +41,6 @@ >>> #include "amdgpu_trace.h" >>> #include "amdgpu_reset.h" >>> >>> -/* >>> - * Fences mark an event in the GPUs pipeline and are used >>> - * for GPU/CPU synchronization. When the fence is written, >>> - * it is expected that all buffers associated with that fence >>> - * are no longer in use by the associated ring on the GPU and >>> - * that the relevant GPU caches have been flushed. >>> - */ >>> - >>> -struct amdgpu_fence { >>> - struct dma_fence base; >>> - >>> - /* RB, DMA, etc. */ >>> - struct amdgpu_ring *ring; >>> - ktime_t start_timestamp; >>> -}; >>> - >> >> Oh, that handling here is completely broken. >> >> The MCBP muxer overwrites fields in the job because of this ^^. >> >> I think that patch needs to be a bug fix we even backport. > > What is broken in the muxer code?
See amdgpu_fence_emit(), the code casts the fence to an amdgpu_fence and assigns start_time. But the fence pointer isn't an amdgpu_fence at all, that is just a dma_fence embedded into an job object! We overwrite the gang submit pointer and the flags with that. The only thing preventing us from crashing is that those values are never used again after emitting the fence. Regards, Christian. > > Alex > >> >> Regards, >> CFhristian. >> >>> static struct kmem_cache *amdgpu_fence_slab; >>> >>> int amdgpu_fence_slab_init(void) >>> @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, >>> struct dma_fence **f, struct amd >>> am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); >>> if (am_fence == NULL) >>> return -ENOMEM; >>> - fence = &am_fence->base; >>> - am_fence->ring = ring; >>> } else { >>> /* take use of job-embedded fence */ >>> - fence = &job->hw_fence; >>> + am_fence = &job->hw_fence; >>> } >>> + fence = &am_fence->base; >>> + am_fence->ring = ring; >>> >>> seq = ++ring->fence_drv.sync_seq; >>> if (job && job->job_run_counter) { >>> @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct >>> amdgpu_ring *ring) >>> * it right here or we won't be able to track them in >>> fence_drv >>> * and they will remain unsignaled during sa_bo free. >>> */ >>> - job = container_of(old, struct amdgpu_job, hw_fence); >>> + job = container_of(old, struct amdgpu_job, >>> hw_fence.base); >>> if (!job->base.s_fence && !dma_fence_is_signaled(old)) >>> dma_fence_signal(old); >>> RCU_INIT_POINTER(*ptr, NULL); >>> @@ -780,7 +764,7 @@ static const char >>> *amdgpu_fence_get_timeline_name(struct dma_fence *f) >>> >>> static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) >>> { >>> - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); >>> + struct amdgpu_job *job = container_of(f, struct amdgpu_job, >>> hw_fence.base); >>> >>> return (const char *)to_amdgpu_ring(job->base.sched)->name; >>> } >>> @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct >>> dma_fence *f) >>> */ >>> static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) >>> { >>> - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); >>> + struct amdgpu_job *job = container_of(f, struct amdgpu_job, >>> hw_fence.base); >>> >>> if >>> (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) >>> >>> amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); >>> @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) >>> struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); >>> >>> /* free job if fence has a parent job */ >>> - kfree(container_of(f, struct amdgpu_job, hw_fence)); >>> + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); >>> } >>> >>> /** >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >>> index acb21fc8b3ce5..ddb9d3269357c 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c >>> @@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) >>> /* Check if any fences where initialized */ >>> if (job->base.s_fence && job->base.s_fence->finished.ops) >>> f = &job->base.s_fence->finished; >>> - else if (job->hw_fence.ops) >>> - f = &job->hw_fence; >>> + else if (job->hw_fence.base.ops) >>> + f = &job->hw_fence.base; >>> else >>> f = NULL; >>> >>> @@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job >>> *s_job) >>> amdgpu_sync_free(&job->explicit_sync); >>> >>> /* only put the hw fence if has embedded fence */ >>> - if (!job->hw_fence.ops) >>> + if (!job->hw_fence.base.ops) >>> kfree(job); >>> else >>> - dma_fence_put(&job->hw_fence); >>> + dma_fence_put(&job->hw_fence.base); >>> } >>> >>> void amdgpu_job_set_gang_leader(struct amdgpu_job *job, >>> @@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job) >>> if (job->gang_submit != &job->base.s_fence->scheduled) >>> dma_fence_put(job->gang_submit); >>> >>> - if (!job->hw_fence.ops) >>> + if (!job->hw_fence.base.ops) >>> kfree(job); >>> else >>> - dma_fence_put(&job->hw_fence); >>> + dma_fence_put(&job->hw_fence.base); >>> } >>> >>> struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>> index f2c049129661f..931fed8892cc1 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>> @@ -48,7 +48,7 @@ struct amdgpu_job { >>> struct drm_sched_job base; >>> struct amdgpu_vm *vm; >>> struct amdgpu_sync explicit_sync; >>> - struct dma_fence hw_fence; >>> + struct amdgpu_fence hw_fence; >>> struct dma_fence *gang_submit; >>> uint32_t preamble_status; >>> uint32_t preemption_status; >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> index b95b471107692..e1f25218943a4 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> @@ -127,6 +127,22 @@ struct amdgpu_fence_driver { >>> struct dma_fence **fences; >>> }; >>> >>> +/* >>> + * Fences mark an event in the GPUs pipeline and are used >>> + * for GPU/CPU synchronization. When the fence is written, >>> + * it is expected that all buffers associated with that fence >>> + * are no longer in use by the associated ring on the GPU and >>> + * that the relevant GPU caches have been flushed. >>> + */ >>> + >>> +struct amdgpu_fence { >>> + struct dma_fence base; >>> + >>> + /* RB, DMA, etc. */ >>> + struct amdgpu_ring *ring; >>> + ktime_t start_timestamp; >>> +}; >>> + >>> extern const struct drm_sched_backend_ops amdgpu_sched_ops; >>> >>> void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); >>