Hi Marek, 

it seems that this patch is causing a few issues [1], any idea what is
going on? Maybe it is best to revert the patch for now? 

Best, 
Gert 


[1] https://bugzilla.freedesktop.org/show_bug.cgi?id=110701

On Fr, 2019-05-10 at 01:19 -0400, Marek Olšák wrote:
> From: Marek Olšák <marek.ol...@amd.com>
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824
> 
> Cc: 19.1 <mesa-sta...@lists.freedesktop.org>
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 94 ++++++++++++-----
> --
>  src/gallium/drivers/radeonsi/si_pipe.h        |  2 +
>  src/gallium/drivers/radeonsi/si_state_draw.c  |  9 +-
>  3 files changed, 72 insertions(+), 33 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 744fc9a15d7..6a4dcacc0f3 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -1580,242 +1580,272 @@ void
> si_update_needs_color_decompress_masks(struct si_context *sctx)
>               si_samplers_update_needs_color_decompress_mask(&sctx-
> >samplers[i]);
>               si_images_update_needs_color_decompress_mask(&sctx-
> >images[i]);
>               si_update_shader_needs_decompress_mask(sctx, i);
>       }
>  
>       si_resident_handles_update_needs_color_decompress(sctx);
>  }
>  
>  /* BUFFER DISCARD/INVALIDATION */
>  
> -/** Reset descriptors of buffer resources after \p buf has been
> invalidated. */
> +/* Reset descriptors of buffer resources after \p buf has been
> invalidated.
> + * If buf == NULL, reset all descriptors.
> + */
>  static void si_reset_buffer_resources(struct si_context *sctx,
>                                     struct si_buffer_resources
> *buffers,
>                                     unsigned descriptors_idx,
>                                     unsigned slot_mask,
>                                     struct pipe_resource *buf,
>                                     enum radeon_bo_priority priority)
>  {
>       struct si_descriptors *descs = &sctx-
> >descriptors[descriptors_idx];
>       unsigned mask = buffers->enabled_mask & slot_mask;
>  
>       while (mask) {
>               unsigned i = u_bit_scan(&mask);
> -             if (buffers->buffers[i] == buf) {
> -                     si_set_buf_desc_address(si_resource(buf),
> buffers->offsets[i],
> +             struct pipe_resource *buffer = buffers->buffers[i];
> +
> +             if (buffer && (!buf || buffer == buf)) {
> +                     si_set_buf_desc_address(si_resource(buffer),
> buffers->offsets[i],
>                                               descs->list + i*4);
>                       sctx->descriptors_dirty |= 1u <<
> descriptors_idx;
>  
>                       radeon_add_to_gfx_buffer_list_check_mem(sctx,
> -                                                             si_reso
> urce(buf),
> +                                                             si_reso
> urce(buffer),
>                                                               buffers
> ->writable_mask & (1u << i) ?
>                                                                       
> RADEON_USAGE_READWRITE :
>                                                                       
> RADEON_USAGE_READ,
>                                                               priorit
> y, true);
>               }
>       }
>  }
>  
> -/* Update all resource bindings where the buffer is bound, including
> +/* Update all buffer bindings where the buffer is bound, including
>   * all resource descriptors. This is invalidate_buffer without
> - * the invalidation. */
> + * the invalidation.
> + *
> + * If buf == NULL, update all buffer bindings.
> + */
>  void si_rebind_buffer(struct si_context *sctx, struct pipe_resource
> *buf)
>  {
>       struct si_resource *buffer = si_resource(buf);
>       unsigned i, shader;
>       unsigned num_elems = sctx->vertex_elements ?
>                                      sctx->vertex_elements->count :
> 0;
>  
>       /* We changed the buffer, now we need to bind it where the old
> one
>        * was bound. This consists of 2 things:
>        *   1) Updating the resource descriptor and dirtying it.
>        *   2) Adding a relocation to the CS, so that it's usable.
>        */
>  
>       /* Vertex buffers. */
> -     if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
> +     if (!buffer) {
> +             if (num_elems)
> +                     sctx->vertex_buffers_dirty = true;
> +     } else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
>               for (i = 0; i < num_elems; i++) {
>                       int vb = sctx->vertex_elements-
> >vertex_buffer_index[i];
>  
>                       if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
>                               continue;
>                       if (!sctx->vertex_buffer[vb].buffer.resource)
>                               continue;
>  
>                       if (sctx->vertex_buffer[vb].buffer.resource ==
> buf) {
>                               sctx->vertex_buffers_dirty = true;
>                               break;
>                       }
>               }
>       }
>  
>       /* Streamout buffers. (other internal buffers can't be
> invalidated) */
> -     if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
> +     if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT)
> {
>               for (i = SI_VS_STREAMOUT_BUF0; i <=
> SI_VS_STREAMOUT_BUF3; i++) {
>                       struct si_buffer_resources *buffers = &sctx-
> >rw_buffers;
>                       struct si_descriptors *descs =
>                               &sctx-
> >descriptors[SI_DESCS_RW_BUFFERS];
> +                     struct pipe_resource *buffer = buffers-
> >buffers[i];
>  
> -                     if (buffers->buffers[i] != buf)
> +                     if (!buffer || (buf && buffer != buf))
>                               continue;
>  
> -                     si_set_buf_desc_address(si_resource(buf),
> buffers->offsets[i],
> +                     si_set_buf_desc_address(si_resource(buffer),
> buffers->offsets[i],
>                                               descs->list + i*4);
>                       sctx->descriptors_dirty |= 1u <<
> SI_DESCS_RW_BUFFERS;
>  
>                       radeon_add_to_gfx_buffer_list_check_mem(sctx,
> -                                                             buffer,
> RADEON_USAGE_WRITE,
> +                                                             si_reso
> urce(buffer),
> +                                                             RADEON_
> USAGE_WRITE,
>                                                               RADEON_
> PRIO_SHADER_RW_BUFFER,
>                                                               true);
>  
>                       /* Update the streamout state. */
>                       if (sctx->streamout.begin_emitted)
>                               si_emit_streamout_end(sctx);
>                       sctx->streamout.append_bitmask =
>                                       sctx->streamout.enabled_mask;
>                       si_streamout_buffers_dirty(sctx);
>               }
>       }
>  
>       /* Constant and shader buffers. */
> -     if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
> +     if (!buffer || buffer->bind_history &
> PIPE_BIND_CONSTANT_BUFFER) {
>               for (shader = 0; shader < SI_NUM_SHADERS; shader++)
>                       si_reset_buffer_resources(sctx, &sctx-
> >const_and_shader_buffers[shader],
>                                                 si_const_and_shader_b
> uffer_descriptors_idx(shader),
>                                                 u_bit_consecutive(SI_
> NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
>                                                 buf,
>                                                 sctx-
> >const_and_shader_buffers[shader].priority_constbuf);
>       }
>  
> -     if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
> +     if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER)
> {
>               for (shader = 0; shader < SI_NUM_SHADERS; shader++)
>                       si_reset_buffer_resources(sctx, &sctx-
> >const_and_shader_buffers[shader],
>                                                 si_const_and_shader_b
> uffer_descriptors_idx(shader),
>                                                 u_bit_consecutive(0,
> SI_NUM_SHADER_BUFFERS),
>                                                 buf,
>                                                 sctx-
> >const_and_shader_buffers[shader].priority);
>       }
>  
> -     if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
> +     if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
>               /* Texture buffers - update bindings. */
>               for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
>                       struct si_samplers *samplers = &sctx-
> >samplers[shader];
>                       struct si_descriptors *descs =
>                               si_sampler_and_image_descriptors(sctx,
> shader);
>                       unsigned mask = samplers->enabled_mask;
>  
>                       while (mask) {
>                               unsigned i = u_bit_scan(&mask);
> -                             if (samplers->views[i]->texture == buf)
> {
> +                             struct pipe_resource *buffer =
> samplers->views[i]->texture;
> +
> +                             if (buffer && (!buf || buffer == buf))
> {
>                                       unsigned desc_slot =
> si_get_sampler_slot(i);
>  
> -                                     si_set_buf_desc_address(si_reso
> urce(buf),
> +                                     si_set_buf_desc_address(si_reso
> urce(buffer),
>                                                               sampler
> s->views[i]->u.buf.offset,
>                                                               descs-
> >list + desc_slot * 16 + 4);
>                                       sctx->descriptors_dirty |=
>                                               1u <<
> si_sampler_and_image_descriptors_idx(shader);
>  
> -                                     radeon_add_to_gfx_buffer_list_c
> heck_mem(sctx,
> -                                                                     
>     buffer, RADEON_USAGE_READ,
> -                                                                     
>     RADEON_PRIO_SAMPLER_BUFFER,
> -                                                                     
>     true);
> +                                     radeon_add_to_gfx_buffer_list_c
> heck_mem(
> +                                             sctx,
> si_resource(buffer),
> +                                             RADEON_USAGE_READ,
> +                                             RADEON_PRIO_SAMPLER_BUF
> FER, true);
>                               }
>                       }
>               }
>       }
>  
>       /* Shader images */
> -     if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
> +     if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
>               for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
>                       struct si_images *images = &sctx-
> >images[shader];
>                       struct si_descriptors *descs =
>                               si_sampler_and_image_descriptors(sctx,
> shader);
>                       unsigned mask = images->enabled_mask;
>  
>                       while (mask) {
>                               unsigned i = u_bit_scan(&mask);
> +                             struct pipe_resource *buffer = images-
> >views[i].resource;
>  
> -                             if (images->views[i].resource == buf) {
> +                             if (buffer && (!buf || buffer == buf))
> {
>                                       unsigned desc_slot =
> si_get_image_slot(i);
>  
>                                       if (images->views[i].access &
> PIPE_IMAGE_ACCESS_WRITE)
>                                               si_mark_image_range_val
> id(&images->views[i]);
>  
> -                                     si_set_buf_desc_address(si_reso
> urce(buf),
> +                                     si_set_buf_desc_address(si_reso
> urce(buffer),
>                                                               images-
> >views[i].u.buf.offset,
>                                                               descs-
> >list + desc_slot * 8 + 4);
>                                       sctx->descriptors_dirty |=
>                                               1u <<
> si_sampler_and_image_descriptors_idx(shader);
>  
>                                       radeon_add_to_gfx_buffer_list_c
> heck_mem(
> -                                             sctx, buffer,
> +                                             sctx,
> si_resource(buffer),
>                                               RADEON_USAGE_READWRITE,
>                                               RADEON_PRIO_SAMPLER_BUF
> FER, true);
>                               }
>                       }
>               }
>       }
>  
>       /* Bindless texture handles */
> -     if (buffer->texture_handle_allocated) {
> +     if (!buffer || buffer->texture_handle_allocated) {
>               struct si_descriptors *descs = &sctx-
> >bindless_descriptors;
>  
>               util_dynarray_foreach(&sctx->resident_tex_handles,
>                                     struct si_texture_handle *,
> tex_handle) {
>                       struct pipe_sampler_view *view = (*tex_handle)-
> >view;
>                       unsigned desc_slot = (*tex_handle)->desc_slot;
> +                     struct pipe_resource *buffer = view->texture;
>  
> -                     if (view->texture == buf) {
> -                             si_set_buf_desc_address(buffer,
> +                     if (buffer && (!buf || buffer == buf)) {
> +                             si_set_buf_desc_address(si_resource(buf
> fer),
>                                                       view-
> >u.buf.offset,
>                                                       descs->list +
>                                                       desc_slot * 16
> + 4);
>  
>                               (*tex_handle)->desc_dirty = true;
>                               sctx->bindless_descriptors_dirty =
> true;
>  
>                               radeon_add_to_gfx_buffer_list_check_mem
> (
> -                                     sctx, buffer,
> +                                     sctx, si_resource(buffer),
>                                       RADEON_USAGE_READ,
>                                       RADEON_PRIO_SAMPLER_BUFFER,
> true);
>                       }
>               }
>       }
>  
>       /* Bindless image handles */
> -     if (buffer->image_handle_allocated) {
> +     if (!buffer || buffer->image_handle_allocated) {
>               struct si_descriptors *descs = &sctx-
> >bindless_descriptors;
>  
>               util_dynarray_foreach(&sctx->resident_img_handles,
>                                     struct si_image_handle *,
> img_handle) {
>                       struct pipe_image_view *view = &(*img_handle)-
> >view;
>                       unsigned desc_slot = (*img_handle)->desc_slot;
> +                     struct pipe_resource *buffer = view->resource;
>  
> -                     if (view->resource == buf) {
> +                     if (buffer && (!buf || buffer == buf)) {
>                               if (view->access &
> PIPE_IMAGE_ACCESS_WRITE)
>                                       si_mark_image_range_valid(view)
> ;
>  
> -                             si_set_buf_desc_address(buffer,
> +                             si_set_buf_desc_address(si_resource(buf
> fer),
>                                                       view-
> >u.buf.offset,
>                                                       descs->list +
>                                                       desc_slot * 16
> + 4);
>  
>                               (*img_handle)->desc_dirty = true;
>                               sctx->bindless_descriptors_dirty =
> true;
>  
>                               radeon_add_to_gfx_buffer_list_check_mem
> (
> -                                     sctx, buffer,
> +                                     sctx, si_resource(buffer),
>                                       RADEON_USAGE_READWRITE,
>                                       RADEON_PRIO_SAMPLER_BUFFER,
> true);
>                       }
>               }
>       }
> +
> +     if (buffer) {
> +             /* Do the same for other contexts. They will invoke
> this function
> +              * with buffer == NULL.
> +              */
> +             unsigned new_counter = p_atomic_inc_return(&sctx-
> >screen->dirty_buf_counter);
> +
> +             /* Skip the update for the current context, because we
> have already updated
> +              * the buffer bindings.
> +              */
> +             if (new_counter == sctx->last_dirty_buf_counter + 1)
> +                     sctx->last_dirty_buf_counter = new_counter;
> +     }
>  }
>  
>  static void si_upload_bindless_descriptor(struct si_context *sctx,
>                                         unsigned desc_slot,
>                                         unsigned num_dwords)
>  {
>       struct si_descriptors *desc = &sctx->bindless_descriptors;
>       unsigned desc_slot_offset = desc_slot * 16;
>       uint32_t *data;
>       uint64_t va;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index d3ddb912245..949fa0755cb 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -519,20 +519,21 @@ struct si_screen {
>       struct si_perfcounters  *perfcounters;
>  
>       /* If pipe_screen wants to recompute and re-emit the
> framebuffer,
>        * sampler, and image states of all contexts, it should
> atomically
>        * increment this.
>        *
>        * Each context will compare this with its own last known value
> of
>        * the counter before drawing and re-emit the states
> accordingly.
>        */
>       unsigned                        dirty_tex_counter;
> +     unsigned                        dirty_buf_counter;
>  
>       /* Atomically increment this counter when an existing texture's
>        * metadata is enabled or disabled in a way that requires
> changing
>        * contexts' compressed texture binding masks.
>        */
>       unsigned                        compressed_colortex_counter;
>  
>       struct {
>               /* Context flags to set so that all writes from earlier
> jobs
>                * in the CP are seen by L2 clients.
> @@ -845,20 +846,21 @@ struct si_context {
>  
>       bool                            has_graphics;
>       bool                            gfx_flush_in_progress:1;
>       bool                            gfx_last_ib_is_busy:1;
>       bool                            compute_is_busy:1;
>  
>       unsigned                        num_gfx_cs_flushes;
>       unsigned                        initial_gfx_cs_size;
>       unsigned                        gpu_reset_counter;
>       unsigned                        last_dirty_tex_counter;
> +     unsigned                        last_dirty_buf_counter;
>       unsigned                        last_compressed_colortex_counter;
>       unsigned                        last_num_draw_calls;
>       unsigned                        flags; /* flush flags */
>       /* Current unaccounted memory usage. */
>       uint64_t                        vram;
>       uint64_t                        gtt;
>  
>       /* Atoms (direct states). */
>       union si_state_atoms            atoms;
>       unsigned                        dirty_atoms; /* mask */
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 8e01e1b35e1..d9dfef0a381 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1247,21 +1247,21 @@ static void si_emit_all_states(struct
> si_context *sctx, const struct pipe_draw_i
>       /* Emit draw states. */
>       si_emit_vs_state(sctx, info);
>       si_emit_draw_registers(sctx, info, num_patches);
>  }
>  
>  static void si_draw_vbo(struct pipe_context *ctx, const struct
> pipe_draw_info *info)
>  {
>       struct si_context *sctx = (struct si_context *)ctx;
>       struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
>       struct pipe_resource *indexbuf = info->index.resource;
> -     unsigned dirty_tex_counter;
> +     unsigned dirty_tex_counter, dirty_buf_counter;
>       enum pipe_prim_type rast_prim;
>       unsigned index_size = info->index_size;
>       unsigned index_offset = info->indirect ? info->start *
> index_size : 0;
>  
>       if (likely(!info->indirect)) {
>               /* SI-CI treat instance_count==0 as instance_count==1.
> There is
>                * no workaround for indirect draws, but we can at
> least skip
>                * direct draws.
>                */
>               if (unlikely(!info->instance_count))
> @@ -1285,20 +1285,27 @@ static void si_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info *i
>       dirty_tex_counter = p_atomic_read(&sctx->screen-
> >dirty_tex_counter);
>       if (unlikely(dirty_tex_counter != sctx-
> >last_dirty_tex_counter)) {
>               sctx->last_dirty_tex_counter = dirty_tex_counter;
>               sctx->framebuffer.dirty_cbufs |=
>                       ((1 << sctx->framebuffer.state.nr_cbufs) - 1);
>               sctx->framebuffer.dirty_zsbuf = true;
>               si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
>               si_update_all_texture_descriptors(sctx);
>       }
>  
> +     dirty_buf_counter = p_atomic_read(&sctx->screen-
> >dirty_buf_counter);
> +     if (unlikely(dirty_buf_counter != sctx-
> >last_dirty_buf_counter)) {
> +             sctx->last_dirty_buf_counter = dirty_buf_counter;
> +             /* Rebind all buffers unconditionally. */
> +             si_rebind_buffer(sctx, NULL);
> +     }
> +
>       si_decompress_textures(sctx, u_bit_consecutive(0,
> SI_NUM_GRAPHICS_SHADERS));
>  
>       /* Set the rasterization primitive type.
>        *
>        * This must be done after si_decompress_textures, which can
> call
>        * draw_vbo recursively, and before si_update_shaders, which
> uses
>        * current_rast_prim for this draw_vbo call. */
>       if (sctx->gs_shader.cso)
>               rast_prim = sctx->gs_shader.cso->gs_output_prim;
>       else if (sctx->tes_shader.cso) {

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to