On Wed., Jul. 17, 2019, 10:43 Christian König, < ckoenig.leichtzumer...@gmail.com> wrote:
> Am 17.07.19 um 16:27 schrieb Marek Olšák: > > > > On Wed., Jul. 17, 2019, 03:15 Christian König, < > ckoenig.leichtzumer...@gmail.com> wrote: > >> Am 17.07.19 um 02:06 schrieb Marek Olšák: >> > From: Marek Olšák <marek.ol...@amd.com> >> > >> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have >> > separate GDS buffers for each gfx ring. >> > >> > This is a workaround to ensure stability of transform feedback. Shaders >> hang >> > waiting for a GDS instruction (ds_sub, not ds_ordered_count). >> > >> > The disadvantage is that compute IBs might get a different VMID, >> > because now gfx always has GDS and compute doesn't. >> >> So far compute is only using GWS, but I don't think that those >> reservations are a good idea in general. >> >> How severe is the ENOMEM problem you see with using an explicit GWS >> allocation? >> > > I guess you mean GDS or OA. > > > Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here. > > There is no ENOMEM, it just hangs. I don't know why. The shader is waiting > for ds_sub and can't continue, but GDS is idle. > > > Well could it be because we don't correctly handle non zero offsets or > stuff like that? > I don't know what you mean. > Does it work with this hack when you don't allocate GDS/OA from the start? > (Just allocate it twice or something like this). > It's only allocated once by the kernel with this hack. Marek > Christian. > > > Marek > > >> Regards, >> Christian. >> >> > >> > Signed-off-by: Marek Olšák <marek.ol...@amd.com> >> > --- >> > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + >> > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++++++ >> > drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 ++++++ >> > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20 ++++++++++++++++++++ >> > 4 files changed, 37 insertions(+) >> > >> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > index 4b514a44184c..cbd55d061b72 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h >> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser { >> > struct drm_file *filp; >> > struct amdgpu_ctx *ctx; >> > >> > /* chunks */ >> > unsigned nchunks; >> > struct amdgpu_cs_chunk *chunks; >> > >> > /* scheduler job object */ >> > struct amdgpu_job *job; >> > struct drm_sched_entity *entity; >> > + unsigned hw_ip; >> > >> > /* buffer objects */ >> > struct ww_acquire_ctx ticket; >> > struct amdgpu_bo_list *bo_list; >> > struct amdgpu_mn *mn; >> > struct amdgpu_bo_list_entry vm_pd; >> > struct list_head validated; >> > struct dma_fence *fence; >> > uint64_t bytes_moved_threshold; >> > uint64_t bytes_moved_vis_threshold; >> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> > index c691df6f7a57..9125cd69a124 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct >> amdgpu_cs_parser *p, >> > if (r) >> > goto error_validate; >> > >> > amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, >> > p->bytes_moved_vis); >> > >> > gds = p->bo_list->gds_obj; >> > gws = p->bo_list->gws_obj; >> > oa = p->bo_list->oa_obj; >> > >> > + if (p->hw_ip == AMDGPU_HW_IP_GFX) { >> > + /* Only gfx10 allocates these. */ >> > + if (!gds) >> > + gds = p->adev->gds.gds_gfx_bo; >> > + if (!oa) >> > + oa = p->adev->gds.oa_gfx_bo; >> > + } >> > + >> > amdgpu_bo_list_for_each_entry(e, p->bo_list) { >> > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); >> > >> > /* Make sure we use the exclusive slot for shared BOs */ >> > if (bo->prime_shared_count) >> > e->tv.num_shared = 0; >> > e->bo_va = amdgpu_vm_bo_find(vm, bo); >> > } >> > >> > if (gds) { >> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device >> *adev, >> > struct drm_amdgpu_cs_chunk_ib *chunk_ib; >> > struct drm_sched_entity *entity; >> > >> > chunk = &parser->chunks[i]; >> > ib = &parser->job->ibs[j]; >> > chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; >> > >> > if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) >> > continue; >> > >> > + parser->hw_ip = chunk_ib->ip_type; >> > + >> > if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && >> > (amdgpu_mcbp || amdgpu_sriov_vf(adev))) { >> > if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { >> > if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) >> > ce_preempt++; >> > else >> > de_preempt++; >> > } >> > >> > /* each GFX command submit allows 0 or 1 IB >> preemptible for CE & DE */ >> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >> > index df8a23554831..0943b8e1d97e 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >> > @@ -26,20 +26,26 @@ >> > >> > struct amdgpu_ring; >> > struct amdgpu_bo; >> > >> > struct amdgpu_gds { >> > uint32_t gds_size; >> > uint32_t gws_size; >> > uint32_t oa_size; >> > uint32_t gds_compute_max_wave_id; >> > uint32_t vgt_gs_max_wave_id; >> > + >> > + /* Reserved OA for the gfx ring. (gfx10) */ >> > + uint32_t gds_gfx_reservation_size; >> > + uint32_t oa_gfx_reservation_size; >> > + struct amdgpu_bo *gds_gfx_bo; >> > + struct amdgpu_bo *oa_gfx_bo; >> > }; >> > >> > struct amdgpu_gds_reg_offset { >> > uint32_t mem_base; >> > uint32_t mem_size; >> > uint32_t gws; >> > uint32_t oa; >> > }; >> > >> > #endif /* __AMDGPU_GDS_H__ */ >> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> > index 618291df659b..3952754c04ff 100644 >> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >> > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle) >> > >> > kiq = &adev->gfx.kiq; >> > r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); >> > if (r) >> > return r; >> > >> > r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd)); >> > if (r) >> > return r; >> > >> > + /* allocate reserved GDS resources for transform feedback */ >> > + r = amdgpu_bo_create_kernel(adev, >> adev->gds.gds_gfx_reservation_size, >> > + 4, AMDGPU_GEM_DOMAIN_GDS, >> > + &adev->gds.gds_gfx_bo, NULL, NULL); >> > + if (r) >> > + return r; >> > + >> > + r = amdgpu_bo_create_kernel(adev, >> adev->gds.oa_gfx_reservation_size, >> > + 1, AMDGPU_GEM_DOMAIN_OA, >> > + &adev->gds.oa_gfx_bo, NULL, NULL); >> > + if (r) >> > + return r; >> > + >> > /* allocate visible FB for rlc auto-loading fw */ >> > if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) >> { >> > r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev); >> > if (r) >> > return r; >> > } >> > >> > adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE; >> > >> > gfx_v10_0_gpu_early_init(adev); >> > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct >> amdgpu_device *adev) >> > amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj, >> > &adev->gfx.me.me_fw_gpu_addr, >> > (void **)&adev->gfx.me.me_fw_ptr); >> > } >> > >> > static int gfx_v10_0_sw_fini(void *handle) >> > { >> > int i; >> > struct amdgpu_device *adev = (struct amdgpu_device *)handle; >> > >> > + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); >> > + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); >> > + >> > for (i = 0; i < adev->gfx.num_gfx_rings; i++) >> > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); >> > for (i = 0; i < adev->gfx.num_compute_rings; i++) >> > amdgpu_ring_fini(&adev->gfx.compute_ring[i]); >> > >> > amdgpu_gfx_mqd_sw_fini(adev); >> > amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); >> > amdgpu_gfx_kiq_fini(adev); >> > >> > gfx_v10_0_pfp_fini(adev); >> > @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct >> amdgpu_device *adev) >> > case CHIP_NAVI10: >> > default: >> > adev->gds.gds_size = 0x10000; >> > adev->gds.gds_compute_max_wave_id = 0x4ff; >> > adev->gds.vgt_gs_max_wave_id = 0x3ff; >> > break; >> > } >> > >> > adev->gds.gws_size = 64; >> > adev->gds.oa_size = 16; >> > + >> > + /* Reserved for transform feedback. */ >> > + adev->gds.gds_gfx_reservation_size = 256; >> > + adev->gds.oa_gfx_reservation_size = 4; >> > } >> > >> > static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct >> amdgpu_device *adev, >> > u32 bitmap) >> > { >> > u32 data; >> > >> > if (!bitmap) >> > return; >> > >> >> > _______________________________________________ > amd-gfx mailing > listamd-gfx@lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx > > >
_______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx