From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 8 ++++++++ src/gallium/drivers/radeonsi/si_pipe.h | 1 + 2 files changed, 9 insertions(+)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index c5d3d5fcf02..e0c6902fec4 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -777,20 +777,28 @@ static void si_emit_dispatch_packets(struct si_context *sctx, if (sctx->chip_class >= CIK) { unsigned num_cu_per_se = sscreen->info.num_good_compute_units / sscreen->info.max_se; /* Force even distribution on all SIMDs in CU if the workgroup * size is 64. This has shown some good improvements if # of CUs * per SE is not a multiple of 4. */ if (num_cu_per_se % 4 && waves_per_threadgroup == 1) compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1); + + compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh); + } else { + /* SI */ + if (sctx->cs_max_waves_per_sh) { + unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16); + compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16); + } } radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, compute_resource_limits); radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 100d0166f62..fe06064b388 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -818,20 +818,21 @@ struct si_context { struct si_shader_ctx_state ps_shader; struct si_shader_ctx_state gs_shader; struct si_shader_ctx_state vs_shader; struct si_shader_ctx_state tcs_shader; struct si_shader_ctx_state tes_shader; struct si_cs_shader_state cs_shader_state; /* shader information */ struct si_vertex_elements *vertex_elements; unsigned sprite_coord_enable; + unsigned cs_max_waves_per_sh; bool flatshade; bool do_update_shaders; /* vertex buffer descriptors */ uint32_t *vb_descriptors_gpu_list; struct r600_resource *vb_descriptors_buffer; unsigned vb_descriptors_offset; /* shader descriptors */ struct si_descriptors descriptors[SI_NUM_DESCS]; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev