From: Dave Airlie <airl...@redhat.com> Same as we did for draw dispatch and vertex sgprs. --- src/amd/vulkan/radv_cmd_buffer.c | 23 +++++++++-------------- src/amd/vulkan/radv_pipeline.c | 6 ++++++ src/amd/vulkan/radv_private.h | 4 ++++ 3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index a069945..a4ddd7e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2872,13 +2872,10 @@ void radv_CmdDispatch( MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, - MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - if (loc->sgpr_idx != -1) { - assert(!loc->indirect); + if (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr) { uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used; - assert(loc->num_sgprs == grid_used); - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used); + radeon_set_sh_reg_seq(cmd_buffer->cs, cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr, + grid_used); radeon_emit(cmd_buffer->cs, x); if (grid_used > 1) radeon_emit(cmd_buffer->cs, y); @@ -2912,9 +2909,9 @@ void radv_CmdDispatchIndirect( radv_flush_compute_state(cmd_buffer); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, - MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - if (loc->sgpr_idx != -1) { + + + if (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr) { uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used; for (unsigned i = 0; i < grid_used; ++i) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0)); @@ -2922,7 +2919,7 @@ void radv_CmdDispatchIndirect( COPY_DATA_DST_SEL(COPY_DATA_REG)); radeon_emit(cmd_buffer->cs, (va + 4 * i)); radeon_emit(cmd_buffer->cs, (va + 4 * i) >> 32); - radeon_emit(cmd_buffer->cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); + radeon_emit(cmd_buffer->cs, (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr >> 2) + i); radeon_emit(cmd_buffer->cs, 0); } } @@ -2984,11 +2981,9 @@ void radv_unaligned_dispatch( S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]) | S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); - struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline, - MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - if (loc->sgpr_idx != -1) { + if (cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr) { uint8_t grid_used = cmd_buffer->state.compute_pipeline->shaders[MESA_SHADER_COMPUTE]->info.info.cs.grid_components_used; - radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, grid_used); + radeon_set_sh_reg_seq(cmd_buffer->cs, cmd_buffer->state.compute_pipeline->compute.cs_grid_size_sgpr, grid_used); radeon_emit(cmd_buffer->cs, blocks[0]); if (grid_used > 1) radeon_emit(cmd_buffer->cs, blocks[1]); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index ccbe20d..bda4c74 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2375,6 +2375,12 @@ static VkResult radv_compute_pipeline_create( pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets; + + struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, + MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); + if (loc->sgpr_idx != -1) { + pipeline->compute.cs_grid_size_sgpr = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4; + } result = radv_pipeline_scratch_init(device, pipeline); if (result != VK_SUCCESS) { radv_pipeline_destroy(device, pipeline, pAllocator); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8f60d9b..29db05c 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1090,6 +1090,10 @@ struct radv_pipeline { bool tess_partial_vs_wave; bool partial_es_wave; } graphics; + + struct { + uint32_t cs_grid_size_sgpr; + } compute; }; unsigned max_waves; -- 2.9.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev