From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/r600_pipe_common.c | 35 ++++++++++++++++++++++++++- src/gallium/drivers/radeon/r600_pipe_common.h | 4 ++- src/gallium/drivers/radeon/r600_query.c | 7 +++--- src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +- src/gallium/drivers/radeonsi/si_state_draw.c | 4 +-- 5 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 960b59c..b28f385 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -96,28 +96,52 @@ void radeon_shader_binary_clean(struct ac_shader_binary *b) * \param data_sel 1 = fence, 3 = timestamp * \param buf Buffer * \param va GPU address * \param old_value Previous fence value (for a bug workaround) * \param new_value Fence value to write for this event. */ void r600_gfx_write_event_eop(struct r600_common_context *ctx, unsigned event, unsigned event_flags, unsigned data_sel, struct r600_resource *buf, uint64_t va, - uint32_t old_fence, uint32_t new_fence) + uint32_t old_fence, uint32_t new_fence, + unsigned query_type) { struct radeon_winsys_cs *cs = ctx->gfx.cs; unsigned op = EVENT_TYPE(event) | EVENT_INDEX(5) | event_flags; if (ctx->chip_class >= GFX9) { + /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion + * counters) must immediately precede every timestamp event to + * prevent a GPU hang on GFX9. + * + * Occlusion queries don't need to do it here, because they + * always do ZPASS_DONE before the timestamp. + */ + if (ctx->chip_class == GFX9 && + query_type != PIPE_QUERY_OCCLUSION_COUNTER && + query_type != PIPE_QUERY_OCCLUSION_PREDICATE) { + struct r600_resource *scratch = ctx->eop_bug_scratch; + + assert(16 * ctx->screen->info.num_render_backends <= + scratch->b.b.width0); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); + radeon_emit(cs, scratch->gpu_address); + radeon_emit(cs, scratch->gpu_address >> 32); + + radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, + RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); + } + radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); radeon_emit(cs, op); radeon_emit(cs, EOP_DATA_SEL(data_sel)); radeon_emit(cs, va); /* address lo */ radeon_emit(cs, va >> 32); /* address hi */ radeon_emit(cs, new_fence); /* immediate data lo */ radeon_emit(cs, 0); /* immediate data hi */ radeon_emit(cs, 0); /* unused */ } else { if (ctx->chip_class == CIK || @@ -648,20 +672,28 @@ bool r600_common_context_init(struct r600_common_context *rctx, } rctx->b.set_device_reset_callback = r600_set_device_reset_callback; r600_init_context_texture_functions(rctx); r600_init_viewport_functions(rctx); r600_streamout_init(rctx); r600_query_init(rctx); cayman_init_msaa(&rctx->b); + if (rctx->chip_class == GFX9) { + rctx->eop_bug_scratch = (struct r600_resource*) + pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, + 16 * rscreen->info.num_render_backends); + if (!rctx->eop_bug_scratch) + return false; + } + rctx->allocator_zeroed_memory = u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, 0, PIPE_USAGE_DEFAULT, 0, true); if (!rctx->allocator_zeroed_memory) return false; rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024, 0, PIPE_USAGE_STREAM); if (!rctx->b.stream_uploader) return false; @@ -717,20 +749,21 @@ void r600_common_context_cleanup(struct r600_common_context *rctx) u_upload_destroy(rctx->b.const_uploader); slab_destroy_child(&rctx->pool_transfers); slab_destroy_child(&rctx->pool_transfers_unsync); if (rctx->allocator_zeroed_memory) { u_suballocator_destroy(rctx->allocator_zeroed_memory); } rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL); rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL); + r600_resource_reference(&rctx->eop_bug_scratch, NULL); } /* * pipe_screen */ static const struct debug_named_value common_debug_options[] = { /* logging */ { "tex", DBG_TEX, "Print texture info" }, { "nir", DBG_NIR, "Enable experimental NIR shaders" }, diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 14bc63e..952fb77 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -553,20 +553,21 @@ struct r600_common_context { struct r600_common_screen *screen; struct radeon_winsys *ws; struct radeon_winsys_ctx *ctx; enum radeon_family family; enum chip_class chip_class; struct r600_ring gfx; struct r600_ring dma; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; + struct r600_resource *eop_bug_scratch; unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; unsigned gpu_reset_counter; unsigned last_dirty_tex_counter; unsigned last_compressed_colortex_counter; struct threaded_context *tc; struct u_suballocator *allocator_zeroed_memory; struct slab_child_pool pool_transfers; struct slab_child_pool pool_transfers_unsync; /* for threaded_context */ @@ -740,21 +741,22 @@ r600_invalidate_resource(struct pipe_context *ctx, struct pipe_resource *resource); void r600_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *dst, struct pipe_resource *src); /* r600_common_pipe.c */ void r600_gfx_write_event_eop(struct r600_common_context *ctx, unsigned event, unsigned event_flags, unsigned data_sel, struct r600_resource *buf, uint64_t va, - uint32_t old_fence, uint32_t new_fence); + uint32_t old_fence, uint32_t new_fence, + unsigned query_type); unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); void r600_gfx_wait_fence(struct r600_common_context *ctx, uint64_t va, uint32_t ref, uint32_t mask); void r600_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib); bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws); void r600_destroy_common_screen(struct r600_common_screen *rscreen); diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index 03ea04d..53b7955 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -773,21 +773,21 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC)); radeon_emit(cs, 0); radeon_emit(cs, 0); radeon_emit(cs, va); radeon_emit(cs, va >> 32); } else { /* Write the timestamp after the last draw is done. * (bottom-of-pipe) */ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, - 0, 3, NULL, va, 0, 0); + 0, 3, NULL, va, 0, 0, query->b.type); } break; case PIPE_QUERY_PIPELINE_STATISTICS: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); break; default: assert(0); @@ -858,21 +858,21 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: va += 16; for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream) emit_sample_streamout(cs, va + 32 * stream, stream); break; case PIPE_QUERY_TIME_ELAPSED: va += 8; /* fall through */ case PIPE_QUERY_TIMESTAMP: r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, - 0, 3, NULL, va, 0, 0); + 0, 3, NULL, va, 0, 0, query->b.type); fence_va = va + 8; break; case PIPE_QUERY_PIPELINE_STATISTICS: { unsigned sample_size = (query->result_size - 8) / 2; va += sample_size; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); @@ -881,21 +881,22 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx, break; } default: assert(0); } r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); if (fence_va) r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1, - query->buffer.buf, fence_va, 0, 0x80000000); + query->buffer.buf, fence_va, 0, 0x80000000, + query->b.type); } static void r600_query_hw_emit_stop(struct r600_common_context *ctx, struct r600_query_hw *query) { uint64_t va; if (!query->buffer.buf) return; // previous buffer allocation failure diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 2e3a174..38aa9ad 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -608,21 +608,21 @@ static void si_pc_emit_start(struct r600_common_context *ctx, } /* Note: The buffer was already added in si_pc_emit_start, so we don't have to * do it again in here. */ static void si_pc_emit_stop(struct r600_common_context *ctx, struct r600_resource *buffer, uint64_t va) { struct radeon_winsys_cs *cs = ctx->gfx.cs; r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1, - buffer, va, 1, 0); + buffer, va, 1, 0, 0); r600_gfx_wait_fence(ctx, va, 0, 0xffffffff); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0)); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1)); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 6e65155..0961289 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -891,21 +891,21 @@ void si_emit_cache_flush(struct si_context *sctx) S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ if (rctx->chip_class == VI) r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, - 0, 0, NULL, 0, 0, 0); + 0, 0, NULL, 0, 0, 0, 0); } if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); } if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) { /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); @@ -993,21 +993,21 @@ void si_emit_cache_flush(struct si_context *sctx) sctx->b.num_L2_invalidates++; } /* Do the flush (enqueue the event and wait for it). */ va = sctx->wait_mem_scratch->gpu_address; sctx->wait_mem_number++; r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1, sctx->wait_mem_scratch, va, sctx->wait_mem_number - 1, - sctx->wait_mem_number); + sctx->wait_mem_number, 0); r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff); } /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ if (cp_coher_cntl || (rctx->flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2 | -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev