From: Marek Olšák <marek.ol...@amd.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824
Cc: 19.1 <mesa-sta...@lists.freedesktop.org> --- src/gallium/drivers/radeonsi/si_descriptors.c | 94 ++++++++++++------- src/gallium/drivers/radeonsi/si_pipe.h | 2 + src/gallium/drivers/radeonsi/si_state_draw.c | 9 +- 3 files changed, 72 insertions(+), 33 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 744fc9a15d7..6a4dcacc0f3 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1580,242 +1580,272 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx) si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]); si_images_update_needs_color_decompress_mask(&sctx->images[i]); si_update_shader_needs_decompress_mask(sctx, i); } si_resident_handles_update_needs_color_decompress(sctx); } /* BUFFER DISCARD/INVALIDATION */ -/** Reset descriptors of buffer resources after \p buf has been invalidated. */ +/* Reset descriptors of buffer resources after \p buf has been invalidated. + * If buf == NULL, reset all descriptors. + */ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, unsigned slot_mask, struct pipe_resource *buf, enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; unsigned mask = buffers->enabled_mask & slot_mask; while (mask) { unsigned i = u_bit_scan(&mask); - if (buffers->buffers[i] == buf) { - si_set_buf_desc_address(si_resource(buf), buffers->offsets[i], + struct pipe_resource *buffer = buffers->buffers[i]; + + if (buffer && (!buf || buffer == buf)) { + si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i*4); sctx->descriptors_dirty |= 1u << descriptors_idx; radeon_add_to_gfx_buffer_list_check_mem(sctx, - si_resource(buf), + si_resource(buffer), buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true); } } } -/* Update all resource bindings where the buffer is bound, including +/* Update all buffer bindings where the buffer is bound, including * all resource descriptors. This is invalidate_buffer without - * the invalidation. */ + * the invalidation. + * + * If buf == NULL, update all buffer bindings. + */ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) { struct si_resource *buffer = si_resource(buf); unsigned i, shader; unsigned num_elems = sctx->vertex_elements ? sctx->vertex_elements->count : 0; /* We changed the buffer, now we need to bind it where the old one * was bound. This consists of 2 things: * 1) Updating the resource descriptor and dirtying it. * 2) Adding a relocation to the CS, so that it's usable. */ /* Vertex buffers. */ - if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { + if (!buffer) { + if (num_elems) + sctx->vertex_buffers_dirty = true; + } else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { for (i = 0; i < num_elems; i++) { int vb = sctx->vertex_elements->vertex_buffer_index[i]; if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) continue; if (!sctx->vertex_buffer[vb].buffer.resource) continue; if (sctx->vertex_buffer[vb].buffer.resource == buf) { sctx->vertex_buffers_dirty = true; break; } } } /* Streamout buffers. (other internal buffers can't be invalidated) */ - if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { + if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) { struct si_buffer_resources *buffers = &sctx->rw_buffers; struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS]; + struct pipe_resource *buffer = buffers->buffers[i]; - if (buffers->buffers[i] != buf) + if (!buffer || (buf && buffer != buf)) continue; - si_set_buf_desc_address(si_resource(buf), buffers->offsets[i], + si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i*4); sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS; radeon_add_to_gfx_buffer_list_check_mem(sctx, - buffer, RADEON_USAGE_WRITE, + si_resource(buffer), + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER, true); /* Update the streamout state. */ if (sctx->streamout.begin_emitted) si_emit_streamout_end(sctx); sctx->streamout.append_bitmask = sctx->streamout.enabled_mask; si_streamout_buffers_dirty(sctx); } } /* Constant and shader buffers. */ - if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { + if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), buf, sctx->const_and_shader_buffers[shader].priority_constbuf); } - if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors_idx(shader), u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), buf, sctx->const_and_shader_buffers[shader].priority); } - if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { + if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { /* Texture buffers - update bindings. */ for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_samplers *samplers = &sctx->samplers[shader]; struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); unsigned mask = samplers->enabled_mask; while (mask) { unsigned i = u_bit_scan(&mask); - if (samplers->views[i]->texture == buf) { + struct pipe_resource *buffer = samplers->views[i]->texture; + + if (buffer && (!buf || buffer == buf)) { unsigned desc_slot = si_get_sampler_slot(i); - si_set_buf_desc_address(si_resource(buf), + si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset, descs->list + desc_slot * 16 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); - radeon_add_to_gfx_buffer_list_check_mem(sctx, - buffer, RADEON_USAGE_READ, - RADEON_PRIO_SAMPLER_BUFFER, - true); + radeon_add_to_gfx_buffer_list_check_mem( + sctx, si_resource(buffer), + RADEON_USAGE_READ, + RADEON_PRIO_SAMPLER_BUFFER, true); } } } } /* Shader images */ - if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { struct si_images *images = &sctx->images[shader]; struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); unsigned mask = images->enabled_mask; while (mask) { unsigned i = u_bit_scan(&mask); + struct pipe_resource *buffer = images->views[i].resource; - if (images->views[i].resource == buf) { + if (buffer && (!buf || buffer == buf)) { unsigned desc_slot = si_get_image_slot(i); if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(&images->views[i]); - si_set_buf_desc_address(si_resource(buf), + si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset, descs->list + desc_slot * 8 + 4); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_gfx_buffer_list_check_mem( - sctx, buffer, + sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } } } } /* Bindless texture handles */ - if (buffer->texture_handle_allocated) { + if (!buffer || buffer->texture_handle_allocated) { struct si_descriptors *descs = &sctx->bindless_descriptors; util_dynarray_foreach(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) { struct pipe_sampler_view *view = (*tex_handle)->view; unsigned desc_slot = (*tex_handle)->desc_slot; + struct pipe_resource *buffer = view->texture; - if (view->texture == buf) { - si_set_buf_desc_address(buffer, + if (buffer && (!buf || buffer == buf)) { + si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, descs->list + desc_slot * 16 + 4); (*tex_handle)->desc_dirty = true; sctx->bindless_descriptors_dirty = true; radeon_add_to_gfx_buffer_list_check_mem( - sctx, buffer, + sctx, si_resource(buffer), RADEON_USAGE_READ, RADEON_PRIO_SAMPLER_BUFFER, true); } } } /* Bindless image handles */ - if (buffer->image_handle_allocated) { + if (!buffer || buffer->image_handle_allocated) { struct si_descriptors *descs = &sctx->bindless_descriptors; util_dynarray_foreach(&sctx->resident_img_handles, struct si_image_handle *, img_handle) { struct pipe_image_view *view = &(*img_handle)->view; unsigned desc_slot = (*img_handle)->desc_slot; + struct pipe_resource *buffer = view->resource; - if (view->resource == buf) { + if (buffer && (!buf || buffer == buf)) { if (view->access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(view); - si_set_buf_desc_address(buffer, + si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, descs->list + desc_slot * 16 + 4); (*img_handle)->desc_dirty = true; sctx->bindless_descriptors_dirty = true; radeon_add_to_gfx_buffer_list_check_mem( - sctx, buffer, + sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true); } } } + + if (buffer) { + /* Do the same for other contexts. They will invoke this function + * with buffer == NULL. + */ + unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter); + + /* Skip the update for the current context, because we have already updated + * the buffer bindings. + */ + if (new_counter == sctx->last_dirty_buf_counter + 1) + sctx->last_dirty_buf_counter = new_counter; + } } static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot, unsigned num_dwords) { struct si_descriptors *desc = &sctx->bindless_descriptors; unsigned desc_slot_offset = desc_slot * 16; uint32_t *data; uint64_t va; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index d3ddb912245..949fa0755cb 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -519,20 +519,21 @@ struct si_screen { struct si_perfcounters *perfcounters; /* If pipe_screen wants to recompute and re-emit the framebuffer, * sampler, and image states of all contexts, it should atomically * increment this. * * Each context will compare this with its own last known value of * the counter before drawing and re-emit the states accordingly. */ unsigned dirty_tex_counter; + unsigned dirty_buf_counter; /* Atomically increment this counter when an existing texture's * metadata is enabled or disabled in a way that requires changing * contexts' compressed texture binding masks. */ unsigned compressed_colortex_counter; struct { /* Context flags to set so that all writes from earlier jobs * in the CP are seen by L2 clients. @@ -845,20 +846,21 @@ struct si_context { bool has_graphics; bool gfx_flush_in_progress:1; bool gfx_last_ib_is_busy:1; bool compute_is_busy:1; unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; unsigned gpu_reset_counter; unsigned last_dirty_tex_counter; + unsigned last_dirty_buf_counter; unsigned last_compressed_colortex_counter; unsigned last_num_draw_calls; unsigned flags; /* flush flags */ /* Current unaccounted memory usage. */ uint64_t vram; uint64_t gtt; /* Atoms (direct states). */ union si_state_atoms atoms; unsigned dirty_atoms; /* mask */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 8e01e1b35e1..d9dfef0a381 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1247,21 +1247,21 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i /* Emit draw states. */ si_emit_vs_state(sctx, info); si_emit_draw_registers(sctx, info, num_patches); } static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_resource *indexbuf = info->index.resource; - unsigned dirty_tex_counter; + unsigned dirty_tex_counter, dirty_buf_counter; enum pipe_prim_type rast_prim; unsigned index_size = info->index_size; unsigned index_offset = info->indirect ? info->start * index_size : 0; if (likely(!info->indirect)) { /* SI-CI treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ if (unlikely(!info->instance_count)) @@ -1285,20 +1285,27 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter); if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) { sctx->last_dirty_tex_counter = dirty_tex_counter; sctx->framebuffer.dirty_cbufs |= ((1 << sctx->framebuffer.state.nr_cbufs) - 1); sctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); si_update_all_texture_descriptors(sctx); } + dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter); + if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) { + sctx->last_dirty_buf_counter = dirty_buf_counter; + /* Rebind all buffers unconditionally. */ + si_rebind_buffer(sctx, NULL); + } + si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS)); /* Set the rasterization primitive type. * * This must be done after si_decompress_textures, which can call * draw_vbo recursively, and before si_update_shaders, which uses * current_rast_prim for this draw_vbo call. */ if (sctx->gs_shader.cso) rast_prim = sctx->gs_shader.cso->gs_output_prim; else if (sctx->tes_shader.cso) { -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev