From: Nicolai Hähnle <nicolai.haeh...@amd.com> For bottom-of-pipe fences inside the gfx command stream. --- src/gallium/drivers/radeon/r600_pipe_common.c | 52 +++++++++++++++++++++++++++ src/gallium/drivers/radeon/r600_pipe_common.h | 5 +++ src/gallium/drivers/radeonsi/si_perfcounter.c | 41 ++------------------- 3 files changed, 60 insertions(+), 38 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index f0fdc9b..2af4b41 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -73,20 +73,72 @@ void radeon_shader_binary_clean(struct radeon_shader_binary *b) FREE(b->global_symbol_offsets); FREE(b->relocs); FREE(b->disasm_string); FREE(b->llvm_ir_string); } /* * pipe_context */ +void r600_gfx_write_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t old_value, uint32_t new_value) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + if (ctx->chip_class == CIK) { + /* Two EOP events are required to make all engines go idle + * (and optional cache flushes executed) before the timestamp + * is written. + */ + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | + EVENT_INDEX(5)); + radeon_emit(cs, va); + radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1)); + radeon_emit(cs, old_value); /* immediate data */ + radeon_emit(cs, 0); /* unused */ + } + + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | + EVENT_INDEX(5)); + radeon_emit(cs, va); + radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1)); + radeon_emit(cs, new_value); /* immediate data */ + radeon_emit(cs, 0); /* unused */ +} + +unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen) +{ + unsigned dwords = 6; + + if (screen->chip_class == CIK) + dwords *= 2; + + return dwords; +} + +void r600_gfx_wait_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask) +{ + struct radeon_winsys_cs *cs = ctx->gfx.cs; + + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, ref); /* reference value */ + radeon_emit(cs, mask); /* mask */ + radeon_emit(cs, 4); /* poll interval */ +} + void r600_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib) { struct r600_common_context *rctx = (struct r600_common_context*)util_blitter_get_pipe(blitter); struct pipe_viewport_state viewport; struct pipe_resource *buf = NULL; unsigned offset = 0; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index dd33eab..96b23b2 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -688,20 +688,25 @@ struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, unsigned alignment); struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, const struct pipe_resource *templ, void *user_memory); void r600_invalidate_resource(struct pipe_context *ctx, struct pipe_resource *resource); /* r600_common_pipe.c */ +void r600_gfx_write_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t old_value, uint32_t new_value); +unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); +void r600_gfx_wait_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask); void r600_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, const union pipe_color_union *attrib); bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws); void r600_destroy_common_screen(struct r600_common_screen *rscreen); void r600_preflush_suspend_features(struct r600_common_context *ctx); void r600_postflush_resume_features(struct r600_common_context *ctx); bool r600_common_context_init(struct r600_common_context *rctx, diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 0ced617..d0c5392 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -584,53 +584,22 @@ static void si_pc_emit_start(struct r600_common_context *ctx, S_036020_PERFMON_STATE(V_036020_START_COUNTING)); } /* Note: The buffer was already added in si_pc_emit_start, so we don't have to * do it again in here. */ static void si_pc_emit_stop(struct r600_common_context *ctx, struct r600_resource *buffer, uint64_t va) { struct radeon_winsys_cs *cs = ctx->gfx.cs; - if (ctx->screen->chip_class == CIK) { - /* Two EOP events are required to make all engines go idle - * (and optional cache flushes executed) before the timestamp - * is written. - * - * Write 1, because we need to wait for the second EOP event. - */ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, va); - radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, 1); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - } - - /* Write 0. */ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, va); - radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, 0); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - - /* Wait until the memory location is 0. */ - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, 0); /* reference value */ - radeon_emit(cs, 0xffffffff); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + r600_gfx_write_fence(ctx, va, 1, 0); + r600_gfx_wait_fence(ctx, va, 0, 0xffffffff); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0)); radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) | S_036020_PERFMON_SAMPLE_ENABLE(1)); } @@ -712,28 +681,24 @@ void si_init_perfcounters(struct si_screen *screen) fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->b.info.max_sh_per_se); } pc = CALLOC_STRUCT(r600_perfcounters); if (!pc) return; pc->num_start_cs_dwords = 14; - pc->num_stop_cs_dwords = 20; + pc->num_stop_cs_dwords = 14 + r600_gfx_write_fence_dwords(&screen->b); pc->num_instance_cs_dwords = 3; pc->num_shaders_cs_dwords = 4; - if (screen->b.chip_class == CIK) { - pc->num_stop_cs_dwords += 6; - } - pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits); pc->shader_type_suffixes = si_pc_shader_type_suffixes; pc->shader_type_bits = si_pc_shader_type_bits; pc->get_size = si_pc_get_size; pc->emit_instance = si_pc_emit_instance; pc->emit_shaders = si_pc_emit_shaders; pc->emit_select = si_pc_emit_select; pc->emit_start = si_pc_emit_start; pc->emit_stop = si_pc_emit_stop; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev