Thanks. It looks good. Marek
On Fri, May 17, 2019 at 3:56 AM Juan A. Suarez Romero <jasua...@igalia.com> wrote: > On Fri, 2019-05-10 at 01:19 -0400, Marek Olšák wrote: > > From: Marek Olšák <marek.ol...@amd.com> > > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824 > > > > Cc: 19.1 <mesa-sta...@lists.freedesktop.org> > > Hi, Marek, > > This patch didn't apply cleanly on 19.1 branch, so I've fixed the > conflicts. > > You can find the solved patch in > > https://gitlab.freedesktop.org/mesa/mesa/commit/21620c889e3fc78be13f096fcccca273cfd27a3d > > > Thanks! > > > J.A. > > > --- > > src/gallium/drivers/radeonsi/si_descriptors.c | 94 ++++++++++++------- > > src/gallium/drivers/radeonsi/si_pipe.h | 2 + > > src/gallium/drivers/radeonsi/si_state_draw.c | 9 +- > > 3 files changed, 72 insertions(+), 33 deletions(-) > > > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > > index 744fc9a15d7..6a4dcacc0f3 100644 > > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > > @@ -1580,242 +1580,272 @@ void > si_update_needs_color_decompress_masks(struct si_context *sctx) > > > si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]); > > > si_images_update_needs_color_decompress_mask(&sctx->images[i]); > > si_update_shader_needs_decompress_mask(sctx, i); > > } > > > > si_resident_handles_update_needs_color_decompress(sctx); > > } > > > > /* BUFFER DISCARD/INVALIDATION */ > > > > -/** Reset descriptors of buffer resources after \p buf has been > invalidated. */ > > +/* Reset descriptors of buffer resources after \p buf has been > invalidated. > > + * If buf == NULL, reset all descriptors. > > + */ > > static void si_reset_buffer_resources(struct si_context *sctx, > > struct si_buffer_resources *buffers, > > unsigned descriptors_idx, > > unsigned slot_mask, > > struct pipe_resource *buf, > > enum radeon_bo_priority priority) > > { > > struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; > > unsigned mask = buffers->enabled_mask & slot_mask; > > > > while (mask) { > > unsigned i = u_bit_scan(&mask); > > - if (buffers->buffers[i] == buf) { > > - si_set_buf_desc_address(si_resource(buf), > buffers->offsets[i], > > + struct pipe_resource *buffer = buffers->buffers[i]; > > + > > + if (buffer && (!buf || buffer == buf)) { > > + si_set_buf_desc_address(si_resource(buffer), > buffers->offsets[i], > > descs->list + i*4); > > sctx->descriptors_dirty |= 1u << descriptors_idx; > > > > radeon_add_to_gfx_buffer_list_check_mem(sctx, > > - > si_resource(buf), > > + > si_resource(buffer), > > > buffers->writable_mask & (1u << i) ? > > > RADEON_USAGE_READWRITE : > > > RADEON_USAGE_READ, > > priority, > true); > > } > > } > > } > > > > -/* Update all resource bindings where the buffer is bound, including > > +/* Update all buffer bindings where the buffer is bound, including > > * all resource descriptors. This is invalidate_buffer without > > - * the invalidation. */ > > + * the invalidation. > > + * > > + * If buf == NULL, update all buffer bindings. > > + */ > > void si_rebind_buffer(struct si_context *sctx, struct pipe_resource > *buf) > > { > > struct si_resource *buffer = si_resource(buf); > > unsigned i, shader; > > unsigned num_elems = sctx->vertex_elements ? > > sctx->vertex_elements->count : 0; > > > > /* We changed the buffer, now we need to bind it where the old one > > * was bound. This consists of 2 things: > > * 1) Updating the resource descriptor and dirtying it. > > * 2) Adding a relocation to the CS, so that it's usable. > > */ > > > > /* Vertex buffers. */ > > - if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { > > + if (!buffer) { > > + if (num_elems) > > + sctx->vertex_buffers_dirty = true; > > + } else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { > > for (i = 0; i < num_elems; i++) { > > int vb = > sctx->vertex_elements->vertex_buffer_index[i]; > > > > if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) > > continue; > > if (!sctx->vertex_buffer[vb].buffer.resource) > > continue; > > > > if (sctx->vertex_buffer[vb].buffer.resource == > buf) { > > sctx->vertex_buffers_dirty = true; > > break; > > } > > } > > } > > > > /* Streamout buffers. (other internal buffers can't be > invalidated) */ > > - if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { > > + if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { > > for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; > i++) { > > struct si_buffer_resources *buffers = > &sctx->rw_buffers; > > struct si_descriptors *descs = > > &sctx->descriptors[SI_DESCS_RW_BUFFERS]; > > + struct pipe_resource *buffer = buffers->buffers[i]; > > > > - if (buffers->buffers[i] != buf) > > + if (!buffer || (buf && buffer != buf)) > > continue; > > > > - si_set_buf_desc_address(si_resource(buf), > buffers->offsets[i], > > + si_set_buf_desc_address(si_resource(buffer), > buffers->offsets[i], > > descs->list + i*4); > > sctx->descriptors_dirty |= 1u << > SI_DESCS_RW_BUFFERS; > > > > radeon_add_to_gfx_buffer_list_check_mem(sctx, > > - buffer, > RADEON_USAGE_WRITE, > > + > si_resource(buffer), > > + > RADEON_USAGE_WRITE, > > > RADEON_PRIO_SHADER_RW_BUFFER, > > true); > > > > /* Update the streamout state. */ > > if (sctx->streamout.begin_emitted) > > si_emit_streamout_end(sctx); > > sctx->streamout.append_bitmask = > > sctx->streamout.enabled_mask; > > si_streamout_buffers_dirty(sctx); > > } > > } > > > > /* Constant and shader buffers. */ > > - if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { > > + if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { > > for (shader = 0; shader < SI_NUM_SHADERS; shader++) > > si_reset_buffer_resources(sctx, > &sctx->const_and_shader_buffers[shader], > > > si_const_and_shader_buffer_descriptors_idx(shader), > > > u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), > > buf, > > > sctx->const_and_shader_buffers[shader].priority_constbuf); > > } > > > > - if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { > > + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { > > for (shader = 0; shader < SI_NUM_SHADERS; shader++) > > si_reset_buffer_resources(sctx, > &sctx->const_and_shader_buffers[shader], > > > si_const_and_shader_buffer_descriptors_idx(shader), > > u_bit_consecutive(0, > SI_NUM_SHADER_BUFFERS), > > buf, > > > sctx->const_and_shader_buffers[shader].priority); > > } > > > > - if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { > > + if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { > > /* Texture buffers - update bindings. */ > > for (shader = 0; shader < SI_NUM_SHADERS; shader++) { > > struct si_samplers *samplers = > &sctx->samplers[shader]; > > struct si_descriptors *descs = > > si_sampler_and_image_descriptors(sctx, > shader); > > unsigned mask = samplers->enabled_mask; > > > > while (mask) { > > unsigned i = u_bit_scan(&mask); > > - if (samplers->views[i]->texture == buf) { > > + struct pipe_resource *buffer = > samplers->views[i]->texture; > > + > > + if (buffer && (!buf || buffer == buf)) { > > unsigned desc_slot = > si_get_sampler_slot(i); > > > > - > si_set_buf_desc_address(si_resource(buf), > > + > si_set_buf_desc_address(si_resource(buffer), > > > samplers->views[i]->u.buf.offset, > > > descs->list + desc_slot * 16 + 4); > > sctx->descriptors_dirty |= > > 1u << > si_sampler_and_image_descriptors_idx(shader); > > > > - > radeon_add_to_gfx_buffer_list_check_mem(sctx, > > - > buffer, RADEON_USAGE_READ, > > - > RADEON_PRIO_SAMPLER_BUFFER, > > - > true); > > + > radeon_add_to_gfx_buffer_list_check_mem( > > + sctx, si_resource(buffer), > > + RADEON_USAGE_READ, > > + > RADEON_PRIO_SAMPLER_BUFFER, true); > > } > > } > > } > > } > > > > /* Shader images */ > > - if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { > > + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { > > for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { > > struct si_images *images = &sctx->images[shader]; > > struct si_descriptors *descs = > > si_sampler_and_image_descriptors(sctx, > shader); > > unsigned mask = images->enabled_mask; > > > > while (mask) { > > unsigned i = u_bit_scan(&mask); > > + struct pipe_resource *buffer = > images->views[i].resource; > > > > - if (images->views[i].resource == buf) { > > + if (buffer && (!buf || buffer == buf)) { > > unsigned desc_slot = > si_get_image_slot(i); > > > > if (images->views[i].access & > PIPE_IMAGE_ACCESS_WRITE) > > > si_mark_image_range_valid(&images->views[i]); > > > > - > si_set_buf_desc_address(si_resource(buf), > > + > si_set_buf_desc_address(si_resource(buffer), > > > images->views[i].u.buf.offset, > > > descs->list + desc_slot * 8 + 4); > > sctx->descriptors_dirty |= > > 1u << > si_sampler_and_image_descriptors_idx(shader); > > > > > radeon_add_to_gfx_buffer_list_check_mem( > > - sctx, buffer, > > + sctx, si_resource(buffer), > > RADEON_USAGE_READWRITE, > > > RADEON_PRIO_SAMPLER_BUFFER, true); > > } > > } > > } > > } > > > > /* Bindless texture handles */ > > - if (buffer->texture_handle_allocated) { > > + if (!buffer || buffer->texture_handle_allocated) { > > struct si_descriptors *descs = &sctx->bindless_descriptors; > > > > util_dynarray_foreach(&sctx->resident_tex_handles, > > struct si_texture_handle *, > tex_handle) { > > struct pipe_sampler_view *view = > (*tex_handle)->view; > > unsigned desc_slot = (*tex_handle)->desc_slot; > > + struct pipe_resource *buffer = view->texture; > > > > - if (view->texture == buf) { > > - si_set_buf_desc_address(buffer, > > + if (buffer && (!buf || buffer == buf)) { > > + > si_set_buf_desc_address(si_resource(buffer), > > view->u.buf.offset, > > descs->list + > > desc_slot * 16 + > 4); > > > > (*tex_handle)->desc_dirty = true; > > sctx->bindless_descriptors_dirty = true; > > > > radeon_add_to_gfx_buffer_list_check_mem( > > - sctx, buffer, > > + sctx, si_resource(buffer), > > RADEON_USAGE_READ, > > RADEON_PRIO_SAMPLER_BUFFER, true); > > } > > } > > } > > > > /* Bindless image handles */ > > - if (buffer->image_handle_allocated) { > > + if (!buffer || buffer->image_handle_allocated) { > > struct si_descriptors *descs = &sctx->bindless_descriptors; > > > > util_dynarray_foreach(&sctx->resident_img_handles, > > struct si_image_handle *, > img_handle) { > > struct pipe_image_view *view = > &(*img_handle)->view; > > unsigned desc_slot = (*img_handle)->desc_slot; > > + struct pipe_resource *buffer = view->resource; > > > > - if (view->resource == buf) { > > + if (buffer && (!buf || buffer == buf)) { > > if (view->access & PIPE_IMAGE_ACCESS_WRITE) > > si_mark_image_range_valid(view); > > > > - si_set_buf_desc_address(buffer, > > + > si_set_buf_desc_address(si_resource(buffer), > > view->u.buf.offset, > > descs->list + > > desc_slot * 16 + > 4); > > > > (*img_handle)->desc_dirty = true; > > sctx->bindless_descriptors_dirty = true; > > > > radeon_add_to_gfx_buffer_list_check_mem( > > - sctx, buffer, > > + sctx, si_resource(buffer), > > RADEON_USAGE_READWRITE, > > RADEON_PRIO_SAMPLER_BUFFER, true); > > } > > } > > } > > + > > + if (buffer) { > > + /* Do the same for other contexts. They will invoke this > function > > + * with buffer == NULL. > > + */ > > + unsigned new_counter = > p_atomic_inc_return(&sctx->screen->dirty_buf_counter); > > + > > + /* Skip the update for the current context, because we > have already updated > > + * the buffer bindings. > > + */ > > + if (new_counter == sctx->last_dirty_buf_counter + 1) > > + sctx->last_dirty_buf_counter = new_counter; > > + } > > } > > > > static void si_upload_bindless_descriptor(struct si_context *sctx, > > unsigned desc_slot, > > unsigned num_dwords) > > { > > struct si_descriptors *desc = &sctx->bindless_descriptors; > > unsigned desc_slot_offset = desc_slot * 16; > > uint32_t *data; > > uint64_t va; > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > > index d3ddb912245..949fa0755cb 100644 > > --- a/src/gallium/drivers/radeonsi/si_pipe.h > > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > > @@ -519,20 +519,21 @@ struct si_screen { > > struct si_perfcounters *perfcounters; > > > > /* If pipe_screen wants to recompute and re-emit the framebuffer, > > * sampler, and image states of all contexts, it should atomically > > * increment this. > > * > > * Each context will compare this with its own last known value of > > * the counter before drawing and re-emit the states accordingly. > > */ > > unsigned dirty_tex_counter; > > + unsigned dirty_buf_counter; > > > > /* Atomically increment this counter when an existing texture's > > * metadata is enabled or disabled in a way that requires changing > > * contexts' compressed texture binding masks. > > */ > > unsigned compressed_colortex_counter; > > > > struct { > > /* Context flags to set so that all writes from earlier > jobs > > * in the CP are seen by L2 clients. > > @@ -845,20 +846,21 @@ struct si_context { > > > > bool has_graphics; > > bool gfx_flush_in_progress:1; > > bool gfx_last_ib_is_busy:1; > > bool compute_is_busy:1; > > > > unsigned num_gfx_cs_flushes; > > unsigned initial_gfx_cs_size; > > unsigned gpu_reset_counter; > > unsigned last_dirty_tex_counter; > > + unsigned last_dirty_buf_counter; > > unsigned last_compressed_colortex_counter; > > unsigned last_num_draw_calls; > > unsigned flags; /* flush flags */ > > /* Current unaccounted memory usage. */ > > uint64_t vram; > > uint64_t gtt; > > > > /* Atoms (direct states). */ > > union si_state_atoms atoms; > > unsigned dirty_atoms; /* mask */ > > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > > index 8e01e1b35e1..d9dfef0a381 100644 > > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > > @@ -1247,21 +1247,21 @@ static void si_emit_all_states(struct si_context > *sctx, const struct pipe_draw_i > > /* Emit draw states. */ > > si_emit_vs_state(sctx, info); > > si_emit_draw_registers(sctx, info, num_patches); > > } > > > > static void si_draw_vbo(struct pipe_context *ctx, const struct > pipe_draw_info *info) > > { > > struct si_context *sctx = (struct si_context *)ctx; > > struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; > > struct pipe_resource *indexbuf = info->index.resource; > > - unsigned dirty_tex_counter; > > + unsigned dirty_tex_counter, dirty_buf_counter; > > enum pipe_prim_type rast_prim; > > unsigned index_size = info->index_size; > > unsigned index_offset = info->indirect ? info->start * index_size > : 0; > > > > if (likely(!info->indirect)) { > > /* SI-CI treat instance_count==0 as instance_count==1. > There is > > * no workaround for indirect draws, but we can at least > skip > > * direct draws. > > */ > > if (unlikely(!info->instance_count)) > > @@ -1285,20 +1285,27 @@ static void si_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info *i > > dirty_tex_counter = > p_atomic_read(&sctx->screen->dirty_tex_counter); > > if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) { > > sctx->last_dirty_tex_counter = dirty_tex_counter; > > sctx->framebuffer.dirty_cbufs |= > > ((1 << sctx->framebuffer.state.nr_cbufs) - 1); > > sctx->framebuffer.dirty_zsbuf = true; > > si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); > > si_update_all_texture_descriptors(sctx); > > } > > > > + dirty_buf_counter = > p_atomic_read(&sctx->screen->dirty_buf_counter); > > + if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) { > > + sctx->last_dirty_buf_counter = dirty_buf_counter; > > + /* Rebind all buffers unconditionally. */ > > + si_rebind_buffer(sctx, NULL); > > + } > > + > > si_decompress_textures(sctx, u_bit_consecutive(0, > SI_NUM_GRAPHICS_SHADERS)); > > > > /* Set the rasterization primitive type. > > * > > * This must be done after si_decompress_textures, which can call > > * draw_vbo recursively, and before si_update_shaders, which uses > > * current_rast_prim for this draw_vbo call. */ > > if (sctx->gs_shader.cso) > > rast_prim = sctx->gs_shader.cso->gs_output_prim; > > else if (sctx->tes_shader.cso) { > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev