Instead mark fence as explicit in it's amdgpu_sync_entry.
v2:
Fix use after free bug and add new parameter description.
Signed-off-by: Andrey Grodzovsky <andrey.grodzov...@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 +++++++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 26
++++++++++++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 23 +++++++++++++++++------
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 6 +++---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++--------
7 files changed, 48 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f8657c3..c56a986 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1121,7 +1121,6 @@ struct amdgpu_job {
struct amdgpu_vm *vm;
struct amdgpu_ring *ring;
struct amdgpu_sync sync;
- struct amdgpu_sync dep_sync;
struct amdgpu_sync sched_sync;
struct amdgpu_ib *ibs;
struct dma_fence *fence; /* the hw fence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d15836b..b694d35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -788,7 +788,7 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
return r;
r = amdgpu_sync_fence(adev, &p->job->sync,
- fpriv->prt_va->last_pt_update);
+ fpriv->prt_va->last_pt_update, false);
if (r)
return r;
@@ -802,7 +802,7 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
return r;
f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f);
+ r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
if (r)
return r;
}
@@ -825,7 +825,7 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
return r;
f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f);
+ r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
if (r)
return r;
}
@@ -836,7 +836,7 @@ static int amdgpu_bo_vm_update_pte(struct
amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
+ r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false);
if (r)
return r;
@@ -1040,8 +1040,8 @@ static int amdgpu_cs_process_fence_dep(struct
amdgpu_cs_parser *p,
amdgpu_ctx_put(ctx);
return r;
} else if (fence) {
- r = amdgpu_sync_fence(p->adev, &p->job->dep_sync,
- fence);
+ r = amdgpu_sync_fence(p->adev, &p->job->sync, fence,
+ true);
dma_fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
@@ -1060,7 +1060,7 @@ static int
amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
if (r)
return r;
- r = amdgpu_sync_fence(p->adev, &p->job->dep_sync, fence);
+ r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
dma_fence_put(fence);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 659997b..0cf86eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -164,7 +164,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
unsigned num_ibs,
}
if (ring->funcs->emit_pipeline_sync && job &&
- ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
+ ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;
dma_fence_put(tmp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 18770a8..61fb934 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -60,7 +60,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev,
unsigned num_ibs,
(*job)->num_ibs = num_ibs;
amdgpu_sync_create(&(*job)->sync);
- amdgpu_sync_create(&(*job)->dep_sync);
amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
@@ -104,7 +103,6 @@ static void amdgpu_job_free_cb(struct
amd_sched_job *s_job)
amdgpu_ring_priority_put(job->ring, s_job->s_priority);
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
- amdgpu_sync_free(&job->dep_sync);
amdgpu_sync_free(&job->sched_sync);
kfree(job);
}
@@ -115,7 +113,6 @@ void amdgpu_job_free(struct amdgpu_job *job)
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
- amdgpu_sync_free(&job->dep_sync);
amdgpu_sync_free(&job->sched_sync);
kfree(job);
}
@@ -149,17 +146,18 @@ static struct dma_fence
*amdgpu_job_dependency(struct amd_sched_job *sched_job,
{
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
-
- struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync);
+ bool explicit = false;
int r;
-
- if (amd_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
- if (r)
- DRM_ERROR("Error adding fence to sync (%d)\n", r);
+ struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync,
&explicit);
+
+ if (fence && explicit) {
+ if (amd_sched_dependency_optimized(fence, s_entity)) {
+ r = amdgpu_sync_fence(job->adev, &job->sched_sync,
fence, false);
+ if (r)
+ DRM_ERROR("Error adding fence to sync (%d)\n", r);
+ }
}
- if (!fence)
- fence = amdgpu_sync_get_fence(&job->sync);
+
while (fence == NULL && vm && !job->vm_id) {
struct amdgpu_ring *ring = job->ring;
@@ -169,7 +167,7 @@ static struct dma_fence
*amdgpu_job_dependency(struct amd_sched_job *sched_job,
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
- fence = amdgpu_sync_get_fence(&job->sync);
+ fence = amdgpu_sync_get_fence(&job->sync, NULL);
}
return fence;
@@ -190,7 +188,7 @@ static struct dma_fence *amdgpu_job_run(struct
amd_sched_job *sched_job)
finished = &job->base.s_fence->finished;
adev = job->adev;
- BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
+ BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL, NULL));
trace_amdgpu_sched_run_job(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a4bf21f..6c3dda1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -35,6 +35,7 @@
struct amdgpu_sync_entry {
struct hlist_node node;
struct dma_fence *fence;
+ bool explicit;
};
static struct kmem_cache *amdgpu_sync_slab;
@@ -141,7 +142,7 @@ static bool amdgpu_sync_add_later(struct
amdgpu_sync *sync, struct dma_fence *f)
*
*/
int amdgpu_sync_fence(struct amdgpu_device *adev, struct
amdgpu_sync *sync,
- struct dma_fence *f)
+ struct dma_fence *f, bool explicit)
{
struct amdgpu_sync_entry *e;
@@ -159,6 +160,8 @@ int amdgpu_sync_fence(struct amdgpu_device
*adev, struct amdgpu_sync *sync,
if (!e)
return -ENOMEM;
+ e->explicit = explicit;
+
hash_add(sync->fences, &e->node, f->context);
e->fence = dma_fence_get(f);
return 0;
@@ -189,7 +192,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
/* always sync to the exclusive fence */
f = reservation_object_get_excl(resv);
- r = amdgpu_sync_fence(adev, sync, f);
+ r = amdgpu_sync_fence(adev, sync, f, false);
if (explicit_sync)
return r;
@@ -220,7 +223,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
continue;
}
- r = amdgpu_sync_fence(adev, sync, f);
+ r = amdgpu_sync_fence(adev, sync, f, false);
if (r)
break;
}
@@ -232,12 +235,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
*
* @sync: the sync object
* @ring: optional ring to use for test
+ * @explicit: true if the next fence is explicit
*
* Returns the next fence not signaled yet without removing it from
the sync
* object.
*/
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
- struct amdgpu_ring *ring)
+ struct amdgpu_ring *ring,
+ bool *explicit)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
@@ -247,6 +252,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct
amdgpu_sync *sync,
struct dma_fence *f = e->fence;
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+ if (explicit)
+ *explicit = e->explicit;
+