Hi Marek, it seems that this patch is causing a few issues [1], any idea what is going on? Maybe it is best to revert the patch for now?
Best, Gert [1] https://bugzilla.freedesktop.org/show_bug.cgi?id=110701 On Fr, 2019-05-10 at 01:19 -0400, Marek Olšák wrote: > From: Marek Olšák <marek.ol...@amd.com> > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824 > > Cc: 19.1 <mesa-sta...@lists.freedesktop.org> > --- > src/gallium/drivers/radeonsi/si_descriptors.c | 94 ++++++++++++----- > -- > src/gallium/drivers/radeonsi/si_pipe.h | 2 + > src/gallium/drivers/radeonsi/si_state_draw.c | 9 +- > 3 files changed, 72 insertions(+), 33 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c > b/src/gallium/drivers/radeonsi/si_descriptors.c > index 744fc9a15d7..6a4dcacc0f3 100644 > --- a/src/gallium/drivers/radeonsi/si_descriptors.c > +++ b/src/gallium/drivers/radeonsi/si_descriptors.c > @@ -1580,242 +1580,272 @@ void > si_update_needs_color_decompress_masks(struct si_context *sctx) > si_samplers_update_needs_color_decompress_mask(&sctx- > >samplers[i]); > si_images_update_needs_color_decompress_mask(&sctx- > >images[i]); > si_update_shader_needs_decompress_mask(sctx, i); > } > > si_resident_handles_update_needs_color_decompress(sctx); > } > > /* BUFFER DISCARD/INVALIDATION */ > > -/** Reset descriptors of buffer resources after \p buf has been > invalidated. */ > +/* Reset descriptors of buffer resources after \p buf has been > invalidated. > + * If buf == NULL, reset all descriptors. > + */ > static void si_reset_buffer_resources(struct si_context *sctx, > struct si_buffer_resources > *buffers, > unsigned descriptors_idx, > unsigned slot_mask, > struct pipe_resource *buf, > enum radeon_bo_priority priority) > { > struct si_descriptors *descs = &sctx- > >descriptors[descriptors_idx]; > unsigned mask = buffers->enabled_mask & slot_mask; > > while (mask) { > unsigned i = u_bit_scan(&mask); > - if (buffers->buffers[i] == buf) { > - si_set_buf_desc_address(si_resource(buf), > buffers->offsets[i], > + struct pipe_resource *buffer = buffers->buffers[i]; > + > + if (buffer && (!buf || buffer == buf)) { > + si_set_buf_desc_address(si_resource(buffer), > buffers->offsets[i], > descs->list + i*4); > sctx->descriptors_dirty |= 1u << > descriptors_idx; > > radeon_add_to_gfx_buffer_list_check_mem(sctx, > - si_reso > urce(buf), > + si_reso > urce(buffer), > buffers > ->writable_mask & (1u << i) ? > > RADEON_USAGE_READWRITE : > > RADEON_USAGE_READ, > priorit > y, true); > } > } > } > > -/* Update all resource bindings where the buffer is bound, including > +/* Update all buffer bindings where the buffer is bound, including > * all resource descriptors. This is invalidate_buffer without > - * the invalidation. */ > + * the invalidation. > + * > + * If buf == NULL, update all buffer bindings. > + */ > void si_rebind_buffer(struct si_context *sctx, struct pipe_resource > *buf) > { > struct si_resource *buffer = si_resource(buf); > unsigned i, shader; > unsigned num_elems = sctx->vertex_elements ? > sctx->vertex_elements->count : > 0; > > /* We changed the buffer, now we need to bind it where the old > one > * was bound. This consists of 2 things: > * 1) Updating the resource descriptor and dirtying it. > * 2) Adding a relocation to the CS, so that it's usable. > */ > > /* Vertex buffers. */ > - if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { > + if (!buffer) { > + if (num_elems) > + sctx->vertex_buffers_dirty = true; > + } else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) { > for (i = 0; i < num_elems; i++) { > int vb = sctx->vertex_elements- > >vertex_buffer_index[i]; > > if (vb >= ARRAY_SIZE(sctx->vertex_buffer)) > continue; > if (!sctx->vertex_buffer[vb].buffer.resource) > continue; > > if (sctx->vertex_buffer[vb].buffer.resource == > buf) { > sctx->vertex_buffers_dirty = true; > break; > } > } > } > > /* Streamout buffers. (other internal buffers can't be > invalidated) */ > - if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) { > + if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) > { > for (i = SI_VS_STREAMOUT_BUF0; i <= > SI_VS_STREAMOUT_BUF3; i++) { > struct si_buffer_resources *buffers = &sctx- > >rw_buffers; > struct si_descriptors *descs = > &sctx- > >descriptors[SI_DESCS_RW_BUFFERS]; > + struct pipe_resource *buffer = buffers- > >buffers[i]; > > - if (buffers->buffers[i] != buf) > + if (!buffer || (buf && buffer != buf)) > continue; > > - si_set_buf_desc_address(si_resource(buf), > buffers->offsets[i], > + si_set_buf_desc_address(si_resource(buffer), > buffers->offsets[i], > descs->list + i*4); > sctx->descriptors_dirty |= 1u << > SI_DESCS_RW_BUFFERS; > > radeon_add_to_gfx_buffer_list_check_mem(sctx, > - buffer, > RADEON_USAGE_WRITE, > + si_reso > urce(buffer), > + RADEON_ > USAGE_WRITE, > RADEON_ > PRIO_SHADER_RW_BUFFER, > true); > > /* Update the streamout state. */ > if (sctx->streamout.begin_emitted) > si_emit_streamout_end(sctx); > sctx->streamout.append_bitmask = > sctx->streamout.enabled_mask; > si_streamout_buffers_dirty(sctx); > } > } > > /* Constant and shader buffers. */ > - if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { > + if (!buffer || buffer->bind_history & > PIPE_BIND_CONSTANT_BUFFER) { > for (shader = 0; shader < SI_NUM_SHADERS; shader++) > si_reset_buffer_resources(sctx, &sctx- > >const_and_shader_buffers[shader], > si_const_and_shader_b > uffer_descriptors_idx(shader), > u_bit_consecutive(SI_ > NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), > buf, > sctx- > >const_and_shader_buffers[shader].priority_constbuf); > } > > - if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) { > + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) > { > for (shader = 0; shader < SI_NUM_SHADERS; shader++) > si_reset_buffer_resources(sctx, &sctx- > >const_and_shader_buffers[shader], > si_const_and_shader_b > uffer_descriptors_idx(shader), > u_bit_consecutive(0, > SI_NUM_SHADER_BUFFERS), > buf, > sctx- > >const_and_shader_buffers[shader].priority); > } > > - if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { > + if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { > /* Texture buffers - update bindings. */ > for (shader = 0; shader < SI_NUM_SHADERS; shader++) { > struct si_samplers *samplers = &sctx- > >samplers[shader]; > struct si_descriptors *descs = > si_sampler_and_image_descriptors(sctx, > shader); > unsigned mask = samplers->enabled_mask; > > while (mask) { > unsigned i = u_bit_scan(&mask); > - if (samplers->views[i]->texture == buf) > { > + struct pipe_resource *buffer = > samplers->views[i]->texture; > + > + if (buffer && (!buf || buffer == buf)) > { > unsigned desc_slot = > si_get_sampler_slot(i); > > - si_set_buf_desc_address(si_reso > urce(buf), > + si_set_buf_desc_address(si_reso > urce(buffer), > sampler > s->views[i]->u.buf.offset, > descs- > >list + desc_slot * 16 + 4); > sctx->descriptors_dirty |= > 1u << > si_sampler_and_image_descriptors_idx(shader); > > - radeon_add_to_gfx_buffer_list_c > heck_mem(sctx, > - > buffer, RADEON_USAGE_READ, > - > RADEON_PRIO_SAMPLER_BUFFER, > - > true); > + radeon_add_to_gfx_buffer_list_c > heck_mem( > + sctx, > si_resource(buffer), > + RADEON_USAGE_READ, > + RADEON_PRIO_SAMPLER_BUF > FER, true); > } > } > } > } > > /* Shader images */ > - if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { > + if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) { > for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { > struct si_images *images = &sctx- > >images[shader]; > struct si_descriptors *descs = > si_sampler_and_image_descriptors(sctx, > shader); > unsigned mask = images->enabled_mask; > > while (mask) { > unsigned i = u_bit_scan(&mask); > + struct pipe_resource *buffer = images- > >views[i].resource; > > - if (images->views[i].resource == buf) { > + if (buffer && (!buf || buffer == buf)) > { > unsigned desc_slot = > si_get_image_slot(i); > > if (images->views[i].access & > PIPE_IMAGE_ACCESS_WRITE) > si_mark_image_range_val > id(&images->views[i]); > > - si_set_buf_desc_address(si_reso > urce(buf), > + si_set_buf_desc_address(si_reso > urce(buffer), > images- > >views[i].u.buf.offset, > descs- > >list + desc_slot * 8 + 4); > sctx->descriptors_dirty |= > 1u << > si_sampler_and_image_descriptors_idx(shader); > > radeon_add_to_gfx_buffer_list_c > heck_mem( > - sctx, buffer, > + sctx, > si_resource(buffer), > RADEON_USAGE_READWRITE, > RADEON_PRIO_SAMPLER_BUF > FER, true); > } > } > } > } > > /* Bindless texture handles */ > - if (buffer->texture_handle_allocated) { > + if (!buffer || buffer->texture_handle_allocated) { > struct si_descriptors *descs = &sctx- > >bindless_descriptors; > > util_dynarray_foreach(&sctx->resident_tex_handles, > struct si_texture_handle *, > tex_handle) { > struct pipe_sampler_view *view = (*tex_handle)- > >view; > unsigned desc_slot = (*tex_handle)->desc_slot; > + struct pipe_resource *buffer = view->texture; > > - if (view->texture == buf) { > - si_set_buf_desc_address(buffer, > + if (buffer && (!buf || buffer == buf)) { > + si_set_buf_desc_address(si_resource(buf > fer), > view- > >u.buf.offset, > descs->list + > desc_slot * 16 > + 4); > > (*tex_handle)->desc_dirty = true; > sctx->bindless_descriptors_dirty = > true; > > radeon_add_to_gfx_buffer_list_check_mem > ( > - sctx, buffer, > + sctx, si_resource(buffer), > RADEON_USAGE_READ, > RADEON_PRIO_SAMPLER_BUFFER, > true); > } > } > } > > /* Bindless image handles */ > - if (buffer->image_handle_allocated) { > + if (!buffer || buffer->image_handle_allocated) { > struct si_descriptors *descs = &sctx- > >bindless_descriptors; > > util_dynarray_foreach(&sctx->resident_img_handles, > struct si_image_handle *, > img_handle) { > struct pipe_image_view *view = &(*img_handle)- > >view; > unsigned desc_slot = (*img_handle)->desc_slot; > + struct pipe_resource *buffer = view->resource; > > - if (view->resource == buf) { > + if (buffer && (!buf || buffer == buf)) { > if (view->access & > PIPE_IMAGE_ACCESS_WRITE) > si_mark_image_range_valid(view) > ; > > - si_set_buf_desc_address(buffer, > + si_set_buf_desc_address(si_resource(buf > fer), > view- > >u.buf.offset, > descs->list + > desc_slot * 16 > + 4); > > (*img_handle)->desc_dirty = true; > sctx->bindless_descriptors_dirty = > true; > > radeon_add_to_gfx_buffer_list_check_mem > ( > - sctx, buffer, > + sctx, si_resource(buffer), > RADEON_USAGE_READWRITE, > RADEON_PRIO_SAMPLER_BUFFER, > true); > } > } > } > + > + if (buffer) { > + /* Do the same for other contexts. They will invoke > this function > + * with buffer == NULL. > + */ > + unsigned new_counter = p_atomic_inc_return(&sctx- > >screen->dirty_buf_counter); > + > + /* Skip the update for the current context, because we > have already updated > + * the buffer bindings. > + */ > + if (new_counter == sctx->last_dirty_buf_counter + 1) > + sctx->last_dirty_buf_counter = new_counter; > + } > } > > static void si_upload_bindless_descriptor(struct si_context *sctx, > unsigned desc_slot, > unsigned num_dwords) > { > struct si_descriptors *desc = &sctx->bindless_descriptors; > unsigned desc_slot_offset = desc_slot * 16; > uint32_t *data; > uint64_t va; > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index d3ddb912245..949fa0755cb 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -519,20 +519,21 @@ struct si_screen { > struct si_perfcounters *perfcounters; > > /* If pipe_screen wants to recompute and re-emit the > framebuffer, > * sampler, and image states of all contexts, it should > atomically > * increment this. > * > * Each context will compare this with its own last known value > of > * the counter before drawing and re-emit the states > accordingly. > */ > unsigned dirty_tex_counter; > + unsigned dirty_buf_counter; > > /* Atomically increment this counter when an existing texture's > * metadata is enabled or disabled in a way that requires > changing > * contexts' compressed texture binding masks. > */ > unsigned compressed_colortex_counter; > > struct { > /* Context flags to set so that all writes from earlier > jobs > * in the CP are seen by L2 clients. > @@ -845,20 +846,21 @@ struct si_context { > > bool has_graphics; > bool gfx_flush_in_progress:1; > bool gfx_last_ib_is_busy:1; > bool compute_is_busy:1; > > unsigned num_gfx_cs_flushes; > unsigned initial_gfx_cs_size; > unsigned gpu_reset_counter; > unsigned last_dirty_tex_counter; > + unsigned last_dirty_buf_counter; > unsigned last_compressed_colortex_counter; > unsigned last_num_draw_calls; > unsigned flags; /* flush flags */ > /* Current unaccounted memory usage. */ > uint64_t vram; > uint64_t gtt; > > /* Atoms (direct states). */ > union si_state_atoms atoms; > unsigned dirty_atoms; /* mask */ > diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c > b/src/gallium/drivers/radeonsi/si_state_draw.c > index 8e01e1b35e1..d9dfef0a381 100644 > --- a/src/gallium/drivers/radeonsi/si_state_draw.c > +++ b/src/gallium/drivers/radeonsi/si_state_draw.c > @@ -1247,21 +1247,21 @@ static void si_emit_all_states(struct > si_context *sctx, const struct pipe_draw_i > /* Emit draw states. */ > si_emit_vs_state(sctx, info); > si_emit_draw_registers(sctx, info, num_patches); > } > > static void si_draw_vbo(struct pipe_context *ctx, const struct > pipe_draw_info *info) > { > struct si_context *sctx = (struct si_context *)ctx; > struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; > struct pipe_resource *indexbuf = info->index.resource; > - unsigned dirty_tex_counter; > + unsigned dirty_tex_counter, dirty_buf_counter; > enum pipe_prim_type rast_prim; > unsigned index_size = info->index_size; > unsigned index_offset = info->indirect ? info->start * > index_size : 0; > > if (likely(!info->indirect)) { > /* SI-CI treat instance_count==0 as instance_count==1. > There is > * no workaround for indirect draws, but we can at > least skip > * direct draws. > */ > if (unlikely(!info->instance_count)) > @@ -1285,20 +1285,27 @@ static void si_draw_vbo(struct pipe_context > *ctx, const struct pipe_draw_info *i > dirty_tex_counter = p_atomic_read(&sctx->screen- > >dirty_tex_counter); > if (unlikely(dirty_tex_counter != sctx- > >last_dirty_tex_counter)) { > sctx->last_dirty_tex_counter = dirty_tex_counter; > sctx->framebuffer.dirty_cbufs |= > ((1 << sctx->framebuffer.state.nr_cbufs) - 1); > sctx->framebuffer.dirty_zsbuf = true; > si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); > si_update_all_texture_descriptors(sctx); > } > > + dirty_buf_counter = p_atomic_read(&sctx->screen- > >dirty_buf_counter); > + if (unlikely(dirty_buf_counter != sctx- > >last_dirty_buf_counter)) { > + sctx->last_dirty_buf_counter = dirty_buf_counter; > + /* Rebind all buffers unconditionally. */ > + si_rebind_buffer(sctx, NULL); > + } > + > si_decompress_textures(sctx, u_bit_consecutive(0, > SI_NUM_GRAPHICS_SHADERS)); > > /* Set the rasterization primitive type. > * > * This must be done after si_decompress_textures, which can > call > * draw_vbo recursively, and before si_update_shaders, which > uses > * current_rast_prim for this draw_vbo call. */ > if (sctx->gs_shader.cso) > rast_prim = sctx->gs_shader.cso->gs_output_prim; > else if (sctx->tes_shader.cso) { _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev