From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_descriptors.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 02f921e..ea5b89e 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -89,20 +89,25 @@ static uint32_t null_texture_descriptor[8] = { static uint32_t null_image_descriptor[8] = { 0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) /* the rest must contain zeros, which is also used by the buffer * descriptor */ }; +static uint16_t si_ce_ram_size(struct si_context *sctx) +{ + return sctx->b.chip_class >= GFX9 ? 4096 : 32768; +} + static void si_init_descriptor_list(uint32_t *desc_list, unsigned element_dw_size, unsigned num_elements, const uint32_t *null_descriptor) { int i; /* Initialize the array to NULL descriptors if the element size is 8. */ if (null_descriptor) { assert(element_dw_size % 8 == 0); @@ -141,25 +146,32 @@ static void si_init_descriptors(struct si_context *sctx, static void si_release_descriptors(struct si_descriptors *desc) { r600_resource_reference(&desc->buffer, NULL); FREE(desc->list); } static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned size, unsigned *out_offset, struct r600_resource **out_buf) { uint64_t va; + unsigned cache_line_size = sctx->screen->b.info.tcc_cache_line_size; - u_suballocator_alloc(sctx->ce_suballocator, size, - si_optimal_tcc_alignment(sctx, size), - out_offset, - (struct pipe_resource**)out_buf); + /* The base and size should be aligned to the L2 cache line size + * for optimal performance. (all dumps should rewrite whole lines) + */ + size = align(size, cache_line_size); + + (void)si_ce_ram_size; /* silence an "unused" warning */ + assert(offset + size <= si_ce_ram_size(sctx)); + + u_suballocator_alloc(sctx->ce_suballocator, size, cache_line_size, + out_offset, (struct pipe_resource**)out_buf); if (!out_buf) return false; va = (*out_buf)->gpu_address + *out_offset; radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0)); radeon_emit(sctx->ce_ib, ce_offset); radeon_emit(sctx->ce_ib, size / 4); radeon_emit(sctx->ce_ib, va); radeon_emit(sctx->ce_ib, va >> 32); @@ -2845,24 +2857,21 @@ void si_init_all_descriptors(struct si_context *sctx) sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots = SI_NUM_RW_BUFFERS; si_init_descriptors(sctx, &sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, 4, SI_NUM_VERTEX_BUFFERS, 0, 0, NULL); FREE(sctx->vertex_buffers.list); /* not used */ sctx->vertex_buffers.list = NULL; sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); sctx->total_ce_ram_allocated = ce_offset; - if (sctx->b.chip_class >= GFX9) - assert(ce_offset <= 4096); - else - assert(ce_offset <= 32768); + assert(ce_offset <= si_ce_ram_size(sctx)); /* Set pipe_context functions. */ sctx->b.b.bind_sampler_states = si_bind_sampler_states; sctx->b.b.set_shader_images = si_set_shader_images; sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer; sctx->b.b.set_polygon_stipple = si_set_polygon_stipple; sctx->b.b.set_shader_buffers = si_set_shader_buffers; sctx->b.b.set_sampler_views = si_set_sampler_views; sctx->b.b.set_stream_output_targets = si_set_streamout_targets; sctx->b.b.create_texture_handle = si_create_texture_handle; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev