On Thu, May 18, 2017 at 11:28 AM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > On 17.05.2017 21:38, Marek Olšák wrote: >> >> From: Marek Olšák <marek.ol...@amd.com> >> >> Constant buffers: slot[16], .. slot[31] (ascending) >> Shader buffers: slot[15], .. slot[0] (descending) >> >> The idea is that if we have 4 constant buffers and 2 shader buffers, we >> only >> have to upload 6 slots. That optimization is left for a later commit. >> --- >> src/gallium/drivers/radeonsi/si_debug.c | 44 ++++--- >> src/gallium/drivers/radeonsi/si_descriptors.c | 141 >> +++++++++++----------- >> src/gallium/drivers/radeonsi/si_pipe.h | 3 +- >> src/gallium/drivers/radeonsi/si_shader.c | 32 ++--- >> src/gallium/drivers/radeonsi/si_shader.h | 20 ++- >> src/gallium/drivers/radeonsi/si_shader_internal.h | 3 +- >> src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 13 +- >> src/gallium/drivers/radeonsi/si_state.h | 25 +++- >> 8 files changed, 150 insertions(+), 131 deletions(-) >> >> diff --git a/src/gallium/drivers/radeonsi/si_debug.c >> b/src/gallium/drivers/radeonsi/si_debug.c >> index d1159ad..25c3882 100644 >> --- a/src/gallium/drivers/radeonsi/si_debug.c >> +++ b/src/gallium/drivers/radeonsi/si_debug.c >> @@ -373,37 +373,38 @@ static void si_dump_framebuffer(struct si_context >> *sctx, FILE *f) >> } >> >> if (state->zsbuf) { >> rtex = (struct r600_texture*)state->zsbuf->texture; >> fprintf(f, COLOR_YELLOW "Depth-stencil buffer:" >> COLOR_RESET "\n"); >> r600_print_texture_info(sctx->b.screen, rtex, f); >> fprintf(f, "\n"); >> } >> } >> >> +typedef unsigned (*slot_remap_func)(unsigned); >> + >> static void si_dump_descriptor_list(struct si_descriptors *desc, >> const char *shader_name, >> const char *elem_name, >> unsigned num_elements, >> + slot_remap_func slot_remap, >> FILE *f) >> { >> unsigned i, j; >> - uint32_t *cpu_list = desc->list; >> - uint32_t *gpu_list = desc->gpu_list; >> - const char *list_note = "GPU list"; >> - >> - if (!gpu_list) { >> - gpu_list = cpu_list; >> - list_note = "CPU list"; >> - } >> >> for (i = 0; i < num_elements; i++) { >> + unsigned dw_offset = slot_remap(i) * >> desc->element_dw_size; >> + uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : >> desc->list; >> + const char *list_note = desc->gpu_list ? "GPU list" : "CPU >> list"; >> + uint32_t *cpu_list = desc->list + dw_offset; >> + uint32_t *gpu_list = gpu_ptr + dw_offset; >> + >> fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET >> "\n", >> shader_name, elem_name, i, list_note); >> >> switch (desc->element_dw_size) { >> case 4: >> for (j = 0; j < 4; j++) >> ac_dump_reg(f, R_008F00_SQ_BUF_RSRC_WORD0 >> + j*4, >> gpu_list[j], 0xffffffff); >> break; >> case 8: >> @@ -437,63 +438,75 @@ static void si_dump_descriptor_list(struct >> si_descriptors *desc, >> gpu_list[12+j], 0xffffffff); >> break; >> } >> >> if (memcmp(gpu_list, cpu_list, desc->element_dw_size * 4) >> != 0) { >> fprintf(f, COLOR_RED "!!!!! This slot was >> corrupted in GPU memory !!!!!" >> COLOR_RESET "\n"); >> } >> >> fprintf(f, "\n"); >> - gpu_list += desc->element_dw_size; >> - cpu_list += desc->element_dw_size; >> } >> } >> >> +static unsigned si_identity(unsigned slot) >> +{ >> + return slot; >> +} >> + >> static void si_dump_descriptors(struct si_context *sctx, >> enum pipe_shader_type processor, >> const struct tgsi_shader_info *info, FILE >> *f) >> { >> struct si_descriptors *descs = >> &sctx->descriptors[SI_DESCS_FIRST_SHADER + >> processor * SI_NUM_SHADER_DESCS]; >> static const char *shader_name[] = {"VS", "PS", "GS", "TCS", >> "TES", "CS"}; >> >> static const char *elem_name[] = { >> " - Constant buffer", >> " - Shader buffer", >> " - Sampler", >> " - Image", >> }; >> + static const slot_remap_func remap_func[] = { >> + si_get_constbuf_slot, >> + si_get_shaderbuf_slot, >> + si_identity, >> + si_identity, >> + }; >> unsigned enabled_slots[] = { >> - sctx->const_buffers[processor].enabled_mask, >> - sctx->shader_buffers[processor].enabled_mask, >> + sctx->const_and_shader_buffers[processor].enabled_mask >> >> SI_NUM_SHADER_BUFFERS, >> + >> util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask & >> + u_bit_consecutive(0, >> SI_NUM_SHADER_BUFFERS)), >> sctx->samplers[processor].views.enabled_mask, >> sctx->images[processor].enabled_mask, >> }; >> unsigned required_slots[] = { >> info ? info->const_buffers_declared : 0, >> info ? info->shader_buffers_declared : 0, >> info ? info->samplers_declared : 0, >> info ? info->images_declared : 0, >> }; >> >> if (processor == PIPE_SHADER_VERTEX) { >> assert(info); /* only CS may not have an info struct */ >> >> si_dump_descriptor_list(&sctx->vertex_buffers, >> shader_name[processor], >> - " - Vertex buffer", >> info->num_inputs, f); >> + " - Vertex buffer", >> info->num_inputs, >> + si_identity, f); >> } >> >> for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs) >> si_dump_descriptor_list(descs, shader_name[processor], >> elem_name[i], >> - util_last_bit(enabled_slots[i] | >> required_slots[i]), f); >> + util_last_bit(enabled_slots[i] | >> required_slots[i]), >> + remap_func[i], f); >> } >> >> static void si_dump_gfx_descriptors(struct si_context *sctx, >> const struct si_shader_ctx_state >> *state, >> FILE *f) >> { >> if (!state->cso || !state->current) >> return; >> >> si_dump_descriptors(sctx, state->cso->type, &state->cso->info, f); >> @@ -798,21 +811,22 @@ static void si_dump_debug_state(struct pipe_context >> *ctx, FILE *f, >> si_dump_gfx_shader(sctx->screen, &sctx->ps_shader, f); >> si_dump_compute_shader(sctx->screen, >> &sctx->cs_shader_state, f); >> >> if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) { >> si_dump_annotated_shaders(sctx, f); >> si_dump_command("Active waves (raw data)", "umr >> -wa | column -t", f); >> si_dump_command("Wave information", "umr -O bits >> -wa", f); >> } >> >> >> si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS], >> - "", "RW buffers", >> SI_NUM_RW_BUFFERS, f); >> + "", "RW buffers", >> SI_NUM_RW_BUFFERS, >> + si_identity, f); >> si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f); >> si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f); >> si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f); >> si_dump_gfx_descriptors(sctx, &sctx->gs_shader, f); >> si_dump_gfx_descriptors(sctx, &sctx->ps_shader, f); >> si_dump_compute_descriptors(sctx, f); >> } >> >> if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) { >> si_dump_bo_list(sctx, &sctx->last_gfx, f); >> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c >> b/src/gallium/drivers/radeonsi/si_descriptors.c >> index c92a657..5dc7068 100644 >> --- a/src/gallium/drivers/radeonsi/si_descriptors.c >> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c >> @@ -929,25 +929,29 @@ static void si_bind_sampler_states(struct >> pipe_context *ctx, >> } >> } >> >> /* BUFFER RESOURCES */ >> >> static void si_init_buffer_resources(struct si_buffer_resources *buffers, >> struct si_descriptors *descs, >> unsigned num_buffers, >> unsigned shader_userdata_index, >> enum radeon_bo_usage shader_usage, >> + enum radeon_bo_usage >> shader_usage_constbuf, >> enum radeon_bo_priority priority, >> + enum radeon_bo_priority >> priority_constbuf, >> unsigned *ce_offset) >> { >> buffers->shader_usage = shader_usage; >> + buffers->shader_usage_constbuf = shader_usage_constbuf; >> buffers->priority = priority; >> + buffers->priority_constbuf = priority_constbuf; >> buffers->buffers = CALLOC(num_buffers, sizeof(struct >> pipe_resource*)); >> >> si_init_descriptors(descs, shader_userdata_index, 4, >> num_buffers, NULL, ce_offset); >> } >> >> static void si_release_buffer_resources(struct si_buffer_resources >> *buffers, >> struct si_descriptors *descs) >> { >> int i; >> @@ -962,22 +966,25 @@ static void si_release_buffer_resources(struct >> si_buffer_resources *buffers, >> static void si_buffer_resources_begin_new_cs(struct si_context *sctx, >> struct si_buffer_resources >> *buffers) >> { >> unsigned mask = buffers->enabled_mask; >> >> /* Add buffers to the CS. */ >> while (mask) { >> int i = u_bit_scan(&mask); >> >> radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, >> - (struct >> r600_resource*)buffers->buffers[i], >> - buffers->shader_usage, >> buffers->priority); >> + r600_resource(buffers->buffers[i]), >> + i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage >> : >> + >> buffers->shader_usage_constbuf, >> + i < SI_NUM_SHADER_BUFFERS ? buffers->priority : >> + >> buffers->priority_constbuf); >> } >> } >> >> static void si_get_buffer_from_descriptors(struct si_buffer_resources >> *buffers, >> struct si_descriptors *descs, >> unsigned idx, struct >> pipe_resource **buf, >> unsigned *offset, unsigned >> *size) >> { >> pipe_resource_reference(buf, buffers->buffers[idx]); >> if (*buf) { >> @@ -1112,30 +1119,30 @@ bool si_upload_vertex_buffer_descriptors(struct >> si_context *sctx) >> si_mark_atom_dirty(sctx, &sctx->prefetch_L2); >> sctx->vertex_buffers_dirty = false; >> sctx->vertex_buffer_pointer_dirty = true; >> return true; >> } >> >> >> /* CONSTANT BUFFERS */ >> >> static unsigned >> -si_const_buffer_descriptors_idx(unsigned shader) >> +si_const_and_shader_buffer_descriptors_idx(unsigned shader) >> { >> return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + >> - SI_SHADER_DESCS_CONST_BUFFERS; >> + SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS; >> } >> >> static struct si_descriptors * >> -si_const_buffer_descriptors(struct si_context *sctx, unsigned shader) >> +si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned >> shader) >> { >> - return >> &sctx->descriptors[si_const_buffer_descriptors_idx(shader)]; >> + return >> &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)]; >> } >> >> void si_upload_const_buffer(struct si_context *sctx, struct r600_resource >> **rbuffer, >> const uint8_t *ptr, unsigned size, uint32_t >> *const_offset) >> { >> void *tmp; >> >> u_upload_alloc(sctx->b.b.const_uploader, 0, size, >> si_optimal_tcc_alignment(sctx, size), >> const_offset, >> @@ -1192,22 +1199,22 @@ static void si_set_constant_buffer(struct >> si_context *sctx, >> desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | >> S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | >> S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | >> S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | >> >> S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | >> >> S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); >> >> buffers->buffers[slot] = buffer; >> radeon_add_to_buffer_list_check_mem(&sctx->b, >> &sctx->b.gfx, >> (struct >> r600_resource*)buffer, >> - buffers->shader_usage, >> - buffers->priority, >> true); >> + >> buffers->shader_usage_constbuf, >> + >> buffers->priority_constbuf, true); >> buffers->enabled_mask |= 1u << slot; >> } else { >> /* Clear the descriptor. */ >> memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4); >> buffers->enabled_mask &= ~(1u << slot); >> } >> >> descs->dirty_mask |= 1u << slot; >> sctx->descriptors_dirty |= 1u << descriptors_idx; >> } >> @@ -1221,77 +1228,64 @@ void si_set_rw_buffer(struct si_context *sctx, >> >> static void si_pipe_set_constant_buffer(struct pipe_context *ctx, >> enum pipe_shader_type shader, uint >> slot, >> const struct pipe_constant_buffer >> *input) >> { >> struct si_context *sctx = (struct si_context *)ctx; >> >> if (shader >= SI_NUM_SHADERS) >> return; >> >> - si_set_constant_buffer(sctx, &sctx->const_buffers[shader], >> - si_const_buffer_descriptors_idx(shader), >> + slot = si_get_constbuf_slot(slot); >> + si_set_constant_buffer(sctx, >> &sctx->const_and_shader_buffers[shader], >> + >> si_const_and_shader_buffer_descriptors_idx(shader), >> slot, input); >> } >> >> void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, >> uint slot, struct pipe_constant_buffer >> *cbuf) >> { >> cbuf->user_buffer = NULL; >> si_get_buffer_from_descriptors( >> - &sctx->const_buffers[shader], >> - si_const_buffer_descriptors(sctx, shader), >> + &sctx->const_and_shader_buffers[shader], >> + si_const_and_shader_buffer_descriptors(sctx, shader), >> slot, &cbuf->buffer, &cbuf->buffer_offset, >> &cbuf->buffer_size); > > > slot needs to be remapped here as well, doesn't it?
Yes, I just need to use si_get_constbuf_slot(slot). > > With that fixed, the patch is: > > Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Thanks. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev