From: Marek Olšák <marek.ol...@amd.com> and write_event_eop -> release_mem --- src/amd/common/sid.h | 1 + src/gallium/drivers/radeonsi/si_fence.c | 32 +++++++++---------- src/gallium/drivers/radeonsi/si_perfcounter.c | 14 ++++---- src/gallium/drivers/radeonsi/si_pipe.h | 16 +++++----- src/gallium/drivers/radeonsi/si_query.c | 32 +++++++++---------- src/gallium/drivers/radeonsi/si_state_draw.c | 24 +++++++------- 6 files changed, 60 insertions(+), 59 deletions(-)
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 3e36eb2d046..69b532177ac 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -139,20 +139,21 @@ #define V_370_MEM_ASYNC 5 #define R_371_DST_ADDR_LO 0x371 #define R_372_DST_ADDR_HI 0x372 #define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PKT3_MEM_SEMAPHORE 0x39 #define PKT3_MPEG_INDEX 0x3A /* not on CIK */ #define PKT3_WAIT_REG_MEM 0x3C #define WAIT_REG_MEM_EQUAL 3 #define WAIT_REG_MEM_NOT_EQUAL 4 #define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4) +#define WAIT_REG_MEM_PFP (1 << 8) #define PKT3_MEM_WRITE 0x3D /* not on CIK */ #define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */ #define R_3F0_IB_BASE_LO 0x3F0 #define R_3F1_IB_BASE_HI 0x3F1 #define R_3F2_CONTROL 0x3F2 #define S_3F2_IB_SIZE(x) (((unsigned)(x) & 0xfffff) << 0) #define G_3F2_IB_SIZE(x) (((unsigned)(x) >> 0) & 0xfffff) #define S_3F2_CHAIN(x) (((unsigned)(x) & 0x1) << 20) #define G_3F2_CHAIN(x) (((unsigned)(x) >> 20) & 0x1) #define S_3F2_VALID(x) (((unsigned)(x) & 0x1) << 23) diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index 005fd9c1576..d1aa4544578 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -57,25 +57,25 @@ struct si_multi_fence { * Write an EOP event. * * \param event EVENT_TYPE_* * \param event_flags Optional cache flush flags (TC) * \param data_sel 1 = fence, 3 = timestamp * \param buf Buffer * \param va GPU address * \param old_value Previous fence value (for a bug workaround) * \param new_value Fence value to write for this event. */ -void si_gfx_write_event_eop(struct si_context *ctx, - unsigned event, unsigned event_flags, - unsigned dst_sel, unsigned int_sel, unsigned data_sel, - struct r600_resource *buf, uint64_t va, - uint32_t new_fence, unsigned query_type) +void si_cp_release_mem(struct si_context *ctx, + unsigned event, unsigned event_flags, + unsigned dst_sel, unsigned int_sel, unsigned data_sel, + struct r600_resource *buf, uint64_t va, + uint32_t new_fence, unsigned query_type) { struct radeon_cmdbuf *cs = ctx->gfx_cs; unsigned op = EVENT_TYPE(event) | EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) | event_flags; unsigned sel = EOP_DST_SEL(dst_sel) | EOP_INT_SEL(int_sel) | EOP_DATA_SEL(data_sel); @@ -140,38 +140,38 @@ void si_gfx_write_event_eop(struct si_context *ctx, radeon_emit(cs, new_fence); /* immediate data */ radeon_emit(cs, 0); /* unused */ } if (buf) { radeon_add_to_buffer_list(ctx, ctx->gfx_cs, buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); } } -unsigned si_gfx_write_fence_dwords(struct si_screen *screen) +unsigned si_cp_write_fence_dwords(struct si_screen *screen) { unsigned dwords = 6; if (screen->info.chip_class == CIK || screen->info.chip_class == VI) dwords *= 2; return dwords; } -void si_gfx_wait_fence(struct si_context *ctx, - uint64_t va, uint32_t ref, uint32_t mask) +void si_cp_wait_mem(struct si_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask, unsigned flags) { struct radeon_cmdbuf *cs = ctx->gfx_cs; radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1) | flags); radeon_emit(cs, va); radeon_emit(cs, va >> 32); radeon_emit(cs, ref); /* reference value */ radeon_emit(cs, mask); /* mask */ radeon_emit(cs, 4); /* poll interval */ } static void si_add_fence_dependency(struct si_context *sctx, struct pipe_fence_handle *fence) { @@ -266,27 +266,27 @@ static void si_fine_fence_set(struct si_context *ctx, if (flags & PIPE_FLUSH_TOP_OF_PIPE) { struct radeon_cmdbuf *cs = ctx->gfx_cs; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); radeon_emit(cs, fence_va); radeon_emit(cs, fence_va >> 32); radeon_emit(cs, 0x80000000); } else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) { - si_gfx_write_event_eop(ctx, - V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, - EOP_DATA_SEL_VALUE_32BIT, - NULL, fence_va, 0x80000000, - PIPE_QUERY_GPU_FINISHED); + si_cp_release_mem(ctx, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DATA_SEL_VALUE_32BIT, + NULL, fence_va, 0x80000000, + PIPE_QUERY_GPU_FINISHED); } else { assert(false); } } static boolean si_fence_finish(struct pipe_screen *screen, struct pipe_context *ctx, struct pipe_fence_handle *fence, uint64_t timeout) { diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index f3ef3d28c8a..2ca6d2d7410 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -573,26 +573,26 @@ static void si_pc_emit_start(struct si_context *sctx, S_036020_PERFMON_STATE(V_036020_START_COUNTING)); } /* Note: The buffer was already added in si_pc_emit_start, so we don't have to * do it again in here. */ static void si_pc_emit_stop(struct si_context *sctx, struct r600_resource *buffer, uint64_t va) { struct radeon_cmdbuf *cs = sctx->gfx_cs; - si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, - EOP_DATA_SEL_VALUE_32BIT, - buffer, va, 0, SI_NOT_QUERY); - si_gfx_wait_fence(sctx, va, 0, 0xffffffff); + si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DATA_SEL_VALUE_32BIT, + buffer, va, 0, SI_NOT_QUERY); + si_cp_wait_mem(sctx, va, 0, 0xffffffff, 0); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1)); } @@ -677,21 +677,21 @@ void si_init_perfcounters(struct si_screen *screen) /* This should not happen on non-SI chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->info.max_sh_per_se); } pc = CALLOC_STRUCT(si_perfcounters); if (!pc) return; - pc->num_stop_cs_dwords = 14 + si_gfx_write_fence_dwords(screen); + pc->num_stop_cs_dwords = 14 + si_cp_write_fence_dwords(screen); pc->num_instance_cs_dwords = 3; pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits); pc->shader_type_suffixes = si_pc_shader_type_suffixes; pc->shader_type_bits = si_pc_shader_type_bits; pc->emit_instance = si_pc_emit_instance; pc->emit_shaders = si_pc_emit_shaders; pc->emit_select = si_pc_emit_select; pc->emit_start = si_pc_emit_start; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 73c54df3a03..bb851374c54 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1163,28 +1163,28 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst, void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value); void si_need_dma_space(struct si_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); void si_flush_dma_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence); void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value); /* si_fence.c */ -void si_gfx_write_event_eop(struct si_context *ctx, - unsigned event, unsigned event_flags, - unsigned dst_sel, unsigned int_sel, unsigned data_sel, - struct r600_resource *buf, uint64_t va, - uint32_t new_fence, unsigned query_type); -unsigned si_gfx_write_fence_dwords(struct si_screen *screen); -void si_gfx_wait_fence(struct si_context *ctx, - uint64_t va, uint32_t ref, uint32_t mask); +void si_cp_release_mem(struct si_context *ctx, + unsigned event, unsigned event_flags, + unsigned dst_sel, unsigned int_sel, unsigned data_sel, + struct r600_resource *buf, uint64_t va, + uint32_t new_fence, unsigned query_type); +unsigned si_cp_write_fence_dwords(struct si_screen *screen); +void si_cp_wait_mem(struct si_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask, unsigned flags); void si_init_fence_functions(struct si_context *ctx); void si_init_screen_fence_functions(struct si_screen *screen); struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx, struct tc_unflushed_batch_token *tc_token); /* si_get.c */ void si_init_screen_get_functions(struct si_screen *sscreen); /* si_gfx_cs.c */ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 45c8e146ecf..9b09c74d48a 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -658,55 +658,55 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, query->b.type = query_type; query->b.ops = &query_hw_ops; query->ops = &query_hw_default_hw_ops; switch (query_type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: query->result_size = 16 * sscreen->info.num_render_backends; query->result_size += 16; /* for the fence + alignment */ - query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen); + query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen); break; case SI_QUERY_TIME_ELAPSED_SDMA: /* GET_GLOBAL_TIMESTAMP only works if the offset is a multiple of 32. */ query->result_size = 64; query->num_cs_dw_end = 0; break; case PIPE_QUERY_TIME_ELAPSED: query->result_size = 24; - query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen); + query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen); break; case PIPE_QUERY_TIMESTAMP: query->result_size = 16; - query->num_cs_dw_end = 8 + si_gfx_write_fence_dwords(sscreen); + query->num_cs_dw_end = 8 + si_cp_write_fence_dwords(sscreen); query->flags = SI_QUERY_HW_FLAG_NO_START; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ query->result_size = 32; query->num_cs_dw_end = 6; query->stream = index; break; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ query->result_size = 32 * SI_MAX_STREAMS; query->num_cs_dw_end = 6 * SI_MAX_STREAMS; break; case PIPE_QUERY_PIPELINE_STATISTICS: /* 11 values on GCN. */ query->result_size = 11 * 16; query->result_size += 8; /* for the fence + alignment */ - query->num_cs_dw_end = 6 + si_gfx_write_fence_dwords(sscreen); + query->num_cs_dw_end = 6 + si_cp_write_fence_dwords(sscreen); break; default: assert(0); FREE(query); return NULL; } if (!si_query_hw_init(sscreen, query)) { FREE(query); return NULL; @@ -883,25 +883,25 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, break; case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE: va += 16; for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) emit_sample_streamout(cs, va + 32 * stream, stream); break; case PIPE_QUERY_TIME_ELAPSED: va += 8; /* fall through */ case PIPE_QUERY_TIMESTAMP: - si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, - EOP_DATA_SEL_TIMESTAMP, NULL, va, - 0, query->b.type); + si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, + 0, EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DATA_SEL_TIMESTAMP, NULL, va, + 0, query->b.type); fence_va = va + 8; break; case PIPE_QUERY_PIPELINE_STATISTICS: { unsigned sample_size = (query->result_size - 8) / 2; va += sample_size; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); @@ -909,26 +909,26 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx, fence_va = va + sample_size; break; } default: assert(0); } radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); if (fence_va) { - si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, - EOP_DATA_SEL_VALUE_32BIT, - query->buffer.buf, fence_va, 0x80000000, - query->b.type); + si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DATA_SEL_VALUE_32BIT, + query->buffer.buf, fence_va, 0x80000000, + query->b.type); } } static void si_query_hw_emit_stop(struct si_context *sctx, struct si_query_hw *query) { uint64_t va; if (!query->buffer.buf) return; // previous buffer allocation failure @@ -1573,21 +1573,21 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, if (wait && qbuf == &query->buffer) { uint64_t va; /* Wait for result availability. Wait only for readiness * of the last entry, since the fence writes should be * serialized in the CP. */ va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size; va += params.fence_offset; - si_gfx_wait_fence(sctx, va, 0x80000000, 0x80000000); + si_cp_wait_mem(sctx, va, 0x80000000, 0x80000000, 0); } sctx->b.launch_grid(&sctx->b, &grid); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; } si_restore_qbo_state(sctx, &saved_state); pipe_resource_reference(&tmp_buffer, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 81eb34d75e2..69f723e4e4a 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -911,25 +911,25 @@ void si_emit_cache_flush(struct si_context *sctx) S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) | S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) | S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) | S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ if (sctx->chip_class == VI) - si_gfx_write_event_eop(sctx, - V_028A90_FLUSH_AND_INV_CB_DATA_TS, - 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, - EOP_DATA_SEL_DISCARD, NULL, - 0, 0, SI_NOT_QUERY); + si_cp_release_mem(sctx, + V_028A90_FLUSH_AND_INV_CB_DATA_TS, + 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, + EOP_DATA_SEL_DISCARD, NULL, + 0, 0, SI_NOT_QUERY); } if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); } if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { /* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); @@ -1028,27 +1028,27 @@ void si_emit_cache_flush(struct si_context *sctx) flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_WRITEBACK_GLOBAL_L2 | SI_CONTEXT_INV_VMEM_L1); sctx->num_L2_invalidates++; } /* Do the flush (enqueue the event and wait for it). */ va = sctx->wait_mem_scratch->gpu_address; sctx->wait_mem_number++; - si_gfx_write_event_eop(sctx, cb_db_event, tc_flags, - EOP_DST_SEL_MEM, - EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, - EOP_DATA_SEL_VALUE_32BIT, - sctx->wait_mem_scratch, va, - sctx->wait_mem_number, SI_NOT_QUERY); - si_gfx_wait_fence(sctx, va, sctx->wait_mem_number, 0xffffffff); + si_cp_release_mem(sctx, cb_db_event, tc_flags, + EOP_DST_SEL_MEM, + EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, + EOP_DATA_SEL_VALUE_32BIT, + sctx->wait_mem_scratch, va, + sctx->wait_mem_number, SI_NOT_QUERY); + si_cp_wait_mem(sctx, va, sctx->wait_mem_number, 0xffffffff, 0); } /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ if (cp_coher_cntl || (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev