and add radeonsi support. This will be used by radeonsi internally. Signed-off-by: Sonny Jiang <sonny.ji...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 33 +++++++++++++++++++---- src/gallium/include/pipe/p_state.h | 7 +++++ 2 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index cbcd8e79c7b..69ffad45cd9 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -797,11 +797,6 @@ static void si_emit_dispatch_packets(struct si_context *sctx, radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, compute_resource_limits); - radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); - radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); - radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); - unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_FORCE_START_AT_000(1) | @@ -809,6 +804,34 @@ static void si_emit_dispatch_packets(struct si_context *sctx, * allow launching waves out-of-order. (same as Vulkan) */ S_00B800_ORDER_MODE(sctx->chip_class >= CIK); + bool partial_block_en = info->partial_block[0] || + info->partial_block[1] || + info->partial_block[2]; + + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); + + if (partial_block_en) { + unsigned partial[3]; + + /* If no partial_block, these should be an entire block size, not 0. */ + partial[0] = info->partial_block[0] ? info->partial_block[0] : info->block[0]; + partial[1] = info->partial_block[1] ? info->partial_block[1] : info->block[1]; + partial[2] = info->partial_block[2] ? info->partial_block[2] : info->block[2]; + + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]) | + S_00B81C_NUM_THREAD_PARTIAL(partial[0])); + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]) | + S_00B820_NUM_THREAD_PARTIAL(partial[1])); + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]) | + S_00B824_NUM_THREAD_PARTIAL(partial[2])); + + dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); + } else { + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); + } + if (info->indirect) { uint64_t base_va = r600_resource(info->indirect)->gpu_address; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 38052e5fd3d..56f5bdd4c85 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -838,6 +838,13 @@ struct pipe_grid_info */ uint block[3]; + /** + * Number of threads to add to the grid in X, Y, and Z directions for + * compute dispatches that are not aligned to the block size. + * The added threads will be launched as partial thread blocks. + */ + uint partial_block[3]; + /** * Determine the layout of the grid (in block units) to be used. */ -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev