Some nice improvements in there. One minor stylistic remark on patch #8, apart from that patches 2-11 are

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

On 18.01.2017 03:11, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

v2: now it should be correct
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 8 ++++----
 src/gallium/drivers/radeonsi/si_state.c       | 6 ++++++
 src/gallium/drivers/radeonsi/si_state.h       | 1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 837f393..391c58b 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -932,29 +932,29 @@ static void si_vertex_buffers_begin_new_cs(struct 
si_context *sctx)
                return;
        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                              desc->buffer, RADEON_USAGE_READ,
                              RADEON_PRIO_DESCRIPTORS);
 }

 bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 {
        struct si_vertex_element *velems = sctx->vertex_elements;
        struct si_descriptors *desc = &sctx->vertex_buffers;
-       bool bound[SI_NUM_VERTEX_BUFFERS] = {};
        unsigned i, count = velems->count;
        uint64_t va;
        uint32_t *ptr;

        if (!sctx->vertex_buffers_dirty || !count || !velems)
                return true;

        unsigned fix_size3 = velems->fix_size3;
+       unsigned first_vb_use_mask = velems->first_vb_use_mask;

        /* Vertex buffer descriptors are the only ones which are uploaded
         * directly through a staging buffer and don't go through
         * the fine-grained upload path.
         */
        u_upload_alloc(sctx->b.uploader, 0, count * 16, 256, 
&desc->buffer_offset,
                       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
        if (!desc->buffer)
                return false;

@@ -962,23 +962,24 @@ bool si_upload_vertex_buffer_descriptors(struct 
si_context *sctx)
                              desc->buffer, RADEON_USAGE_READ,
                              RADEON_PRIO_DESCRIPTORS);

        assert(count <= SI_NUM_VERTEX_BUFFERS);

        for (i = 0; i < count; i++) {
                struct pipe_vertex_element *ve = &velems->elements[i];
                struct pipe_vertex_buffer *vb;
                struct r600_resource *rbuffer;
                unsigned offset;
+               unsigned vbo_index = ve->vertex_buffer_index;
                uint32_t *desc = &ptr[i*4];

-               vb = &sctx->vertex_buffer[ve->vertex_buffer_index];
+               vb = &sctx->vertex_buffer[vbo_index];
                rbuffer = (struct r600_resource*)vb->buffer;
                if (!rbuffer) {
                        memset(desc, 0, 16);
                        continue;
                }

                offset = vb->buffer_offset + ve->src_offset;
                va = rbuffer->gpu_address + offset;

                /* Fill in T# buffer resource description */
@@ -1011,25 +1012,24 @@ bool si_upload_vertex_buffer_descriptors(struct 
si_context *sctx)
                        size3 = (fix_size3 >> (2 * i)) & 3;
                        if (vb->stride && size3) {
                                assert(offset % 4 == 0 && vb->stride % 4 == 0);
                                assert(size3 <= 2);
                                desc[2] = align(desc[2], size3 * 2);
                        }
                }

                desc[3] = velems->rsrc_word3[i];

-               if (!bound[ve->vertex_buffer_index]) {
+               if (first_vb_use_mask & (1 << i)) {
                        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                              (struct r600_resource*)vb->buffer,
                                              RADEON_USAGE_READ, 
RADEON_PRIO_VERTEX_BUFFER);
-                       bound[ve->vertex_buffer_index] = true;
                }
        }

        /* Don't flush the const cache. It would have a very negative effect
         * on performance (confirmed by testing). New descriptors are always
         * uploaded to a fresh new buffer, so I don't think flushing the const
         * cache is needed. */
        si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
        sctx->vertex_buffers_dirty = false;
        sctx->vertex_buffer_pointer_dirty = true;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 3022260..f60a499 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3316,39 +3316,45 @@ static void si_delete_sampler_state(struct pipe_context 
*ctx, void *state)

 /*
  * Vertex elements & buffers
  */

 static void *si_create_vertex_elements(struct pipe_context *ctx,
                                       unsigned count,
                                       const struct pipe_vertex_element 
*elements)
 {
        struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
+       bool used[SI_NUM_VERTEX_BUFFERS] = {};
        int i;

        assert(count <= SI_MAX_ATTRIBS);
        if (!v)
                return NULL;

        v->count = count;
        for (i = 0; i < count; ++i) {
                const struct util_format_description *desc;
                const struct util_format_channel_description *channel;
                unsigned data_format, num_format;
                int first_non_void;
                unsigned vbo_index = elements[i].vertex_buffer_index;

                if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
                        FREE(v);
                        return NULL;
                }

+               if (!used[vbo_index]) {
+                       v->first_vb_use_mask |= 1 << i;
+                       used[vbo_index] = true;
+               }
+
                desc = util_format_description(elements[i].src_format);
                first_non_void = 
util_format_get_first_non_void_channel(elements[i].src_format);
                data_format = si_translate_buffer_dataformat(ctx->screen, desc, 
first_non_void);
                num_format = si_translate_buffer_numformat(ctx->screen, desc, 
first_non_void);
                channel = &desc->channel[first_non_void];

                v->rsrc_word3[i] = 
S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
                                   
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
                                   
S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
                                   
S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 34a0f57..03e5011 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -92,20 +92,21 @@ struct si_state_dsa {

 struct si_stencil_ref {
        struct r600_atom                atom;
        struct pipe_stencil_ref         state;
        struct si_dsa_stencil_ref_part  dsa_part;
 };

 struct si_vertex_element
 {
        unsigned                        count;
+       unsigned                        first_vb_use_mask;

        /* Two bits per attribute indicating the size of each vector component
         * in bytes if the size 3-workaround must be applied.
         */
        uint32_t                        fix_size3;
        uint64_t                        fix_fetch;

        uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
        uint32_t                        format_size[SI_MAX_ATTRIBS];
        struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to