On Tue, Jan 29, 2019, 3:01 AM Christian König < ckoenig.leichtzumer...@gmail.com wrote:
> Am 28.01.19 um 22:52 schrieb Marek Olšák: > > From: Marek Olšák <marek.ol...@amd.com> > > > > Normal syncobjs signal when an IB finishes. Start syncobjs signal when > > an IB starts. > > That approach has quite a number of problems (for example you can't > allocate memory at this point). > > Better add a flag that we should only sync on scheduling for a > dependency/syncobj instead. > I don't understand. Can you give me an example of the interface and how the implementation would look? Thanks, Marek > Christian. > > > > > Signed-off-by: Marek Olšák <marek.ol...@amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 ++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > > include/uapi/drm/amdgpu_drm.h | 13 ++++++++++++- > > 4 files changed, 33 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index d67f8b1dfe80..8e2f7e558bc9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -453,20 +453,21 @@ struct amdgpu_cs_parser { > > struct dma_fence *fence; > > uint64_t bytes_moved_threshold; > > uint64_t bytes_moved_vis_threshold; > > uint64_t bytes_moved; > > uint64_t bytes_moved_vis; > > struct amdgpu_bo_list_entry *evictable; > > > > /* user fence */ > > struct amdgpu_bo_list_entry uf_entry; > > > > + bool get_start_syncobj; > > unsigned num_post_dep_syncobjs; > > struct drm_syncobj **post_dep_syncobjs; > > }; > > > > static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, > > uint32_t ib_idx, int idx) > > { > > return p->job->ibs[ib_idx].ptr[idx]; > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index 1c49b8266d69..917f3818c61c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct > amdgpu_device *adev, > > r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, > > chunk_ib->ip_instance, > chunk_ib->ring, > > &entity); > > if (r) > > return r; > > > > if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) > > parser->job->preamble_status |= > > AMDGPU_PREAMBLE_IB_PRESENT; > > > > + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ) > > + parser->get_start_syncobj = true; > > + > > if (parser->entity && parser->entity != entity) > > return -EINVAL; > > > > parser->entity = entity; > > > > ring = to_amdgpu_ring(entity->rq->sched); > > r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? > > chunk_ib->ib_bytes : 0, ib); > > if (r) { > > DRM_ERROR("Failed to get ib !\n"); > > @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct > amdgpu_cs_parser *p, > > amdgpu_mn_lock(p->mn); > > amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > > > > if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { > > r = -ERESTARTSYS; > > goto error_abort; > > } > > } > > > > + if (p->get_start_syncobj) { > > + struct drm_syncobj *syncobj; > > + > > + r = drm_syncobj_create(&syncobj, 0, > > + &job->base.s_fence->scheduled); > > + if (r) > > + goto error_abort; > > + > > + r = drm_syncobj_get_handle(p->filp, syncobj, > > + &cs->out.start_syncobj); > > + if (r) > > + goto error_abort; > > + drm_syncobj_put(syncobj); > > + } > > + > > job->owner = p->filp; > > p->fence = dma_fence_get(&job->base.s_fence->finished); > > > > amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); > > amdgpu_cs_post_dependencies(p); > > > > if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && > > !p->ctx->preamble_presented) { > > job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; > > p->ctx->preamble_presented = true; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > index c806f984bcc5..a230a30722d4 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > > @@ -64,23 +64,24 @@ > > * - 3.18.0 - Export gpu always on cu bitmap > > * - 3.19.0 - Add support for UVD MJPEG decode > > * - 3.20.0 - Add support for local BOs > > * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl > > * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl > > * - 3.23.0 - Add query for VRAM lost counter > > * - 3.24.0 - Add high priority compute support for gfx9 > > * - 3.25.0 - Add support for sensor query info (stable pstate > sclk/mclk). > > * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. > > * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. > > + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ > > */ > > #define KMS_DRIVER_MAJOR 3 > > -#define KMS_DRIVER_MINOR 27 > > +#define KMS_DRIVER_MINOR 28 > > #define KMS_DRIVER_PATCHLEVEL 0 > > > > int amdgpu_vram_limit = 0; > > int amdgpu_vis_vram_limit = 0; > > int amdgpu_gart_size = -1; /* auto */ > > int amdgpu_gtt_size = -1; /* auto */ > > int amdgpu_moverate = -1; /* auto */ > > int amdgpu_benchmarking = 0; > > int amdgpu_testing = 0; > > int amdgpu_audio = -1; > > diff --git a/include/uapi/drm/amdgpu_drm.h > b/include/uapi/drm/amdgpu_drm.h > > index 662d379ea624..d0e0c99cea32 100644 > > --- a/include/uapi/drm/amdgpu_drm.h > > +++ b/include/uapi/drm/amdgpu_drm.h > > @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in { > > __u32 ctx_id; > > /** Handle of resource list associated with CS */ > > __u32 bo_list_handle; > > __u32 num_chunks; > > __u32 _pad; > > /** this points to __u64 * which point to cs chunks */ > > __u64 chunks; > > }; > > > > struct drm_amdgpu_cs_out { > > - __u64 handle; > > + __u64 handle; /* sequence number */ > > + __u32 start_syncobj; /* signalled when IB execution begins */ > > + __u32 _pad; > > }; > > > > union drm_amdgpu_cs { > > struct drm_amdgpu_cs_in in; > > struct drm_amdgpu_cs_out out; > > }; > > > > /* Specify flags to be used for IB */ > > > > /* This IB should be submitted to CE */ > > @@ -566,20 +568,29 @@ union drm_amdgpu_cs { > > > > /* The IB fence should do the L2 writeback but not invalidate any > shader > > * caches (L2/vL1/sL1/I$). */ > > #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) > > > > /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before > PACKET3_INDIRECT_BUFFER. > > * This will reset wave ID counters for the IB. > > */ > > #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) > > > > +/* The CS ioctl will return a syncobj representing when all IBs begin > > + * execution. If set, this applies to all IBs. The returned syncobj can > be > > + * used as an IB dependency for other IBs. > > + * > > + * This is used for GPU deadlock prevention when userspace uses mid-IB > fences > > + * to wait for mid-IB work on other rings. > > + */ > > +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5) > > + > > struct drm_amdgpu_cs_chunk_ib { > > __u32 _pad; > > /** AMDGPU_IB_FLAG_* */ > > __u32 flags; > > /** Virtual address to begin IB execution */ > > __u64 va_start; > > /** Size of submission */ > > __u32 ib_bytes; > > /** HW IP to submit to */ > > __u32 ip_type; > >
_______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx