v2: Use chip_class instead of family. v3: Check kernel version for SI.
Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- docs/GL3.txt | 4 ++-- docs/relnotes/11.3.0.html | 1 + src/gallium/drivers/radeon/r600_pipe_common.c | 21 ++++++++++++++++----- src/gallium/drivers/radeonsi/si_pipe.c | 15 +++++++++++++-- 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 3febd6e..6214f8d 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -167,7 +167,7 @@ GL 4.3, GLSL 4.30: GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30) GL_ARB_ES3_compatibility DONE (all drivers that support GLSL 3.30) GL_ARB_clear_buffer_object DONE (all drivers) - GL_ARB_compute_shader DONE (i965) + GL_ARB_compute_shader DONE (i965, radeonsi) GL_ARB_copy_image DONE (i965, nv50, nvc0, r600, radeonsi) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) @@ -225,7 +225,7 @@ GL 4.5, GLSL 4.50: These are the extensions cherry-picked to make GLES 3.1 GLES3.1, GLSL ES 3.1 GL_ARB_arrays_of_arrays DONE (all drivers that support GLSL 1.30) - GL_ARB_compute_shader DONE (i965) + GL_ARB_compute_shader DONE (i965, radeonsi) GL_ARB_draw_indirect DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) GL_ARB_framebuffer_no_attachments DONE (i965, nvc0, r600, radeonsi, softpipe) diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index 0f9aed8..5a7083c 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers. <ul> <li>OpenGL 4.2 on radeonsi</li> +<li>GL_ARB_compute_shader on radeonsi</li> <li>GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi, softpipe</li> <li>GL_ARB_internalformat_query2 on all drivers</li> <li>GL_ARB_robust_buffer_access_behavior on radeonsi</li> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index a7477ab..64da62f 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -645,23 +645,34 @@ static int r600_get_compute_param(struct pipe_screen *screen, uint64_t *grid_size = ret; grid_size[0] = 65535; grid_size[1] = 65535; - grid_size[2] = 1; + grid_size[2] = 65535; } return 3 * sizeof(uint64_t) ; case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: if (ret) { uint64_t *block_size = ret; - block_size[0] = 256; - block_size[1] = 256; - block_size[2] = 256; + if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && + ir_type == PIPE_SHADER_IR_TGSI) { + block_size[0] = 2048; + block_size[1] = 2048; + block_size[2] = 2048; + } else { + block_size[0] = 256; + block_size[1] = 256; + block_size[2] = 256; + } } return 3 * sizeof(uint64_t); case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: if (ret) { uint64_t *max_threads_per_block = ret; - *max_threads_per_block = 256; + if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && + ir_type == PIPE_SHADER_IR_TGSI) + *max_threads_per_block = 2048; + else + *max_threads_per_block = 256; } return sizeof(uint64_t); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f22cd03..7501a8f 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -447,6 +447,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enum pipe_shader_cap param) { + struct si_screen *sscreen = (struct si_screen *)pscreen; + switch(shader) { case PIPE_SHADER_FRAGMENT: @@ -464,9 +466,18 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_NATIVE; - case PIPE_SHADER_CAP_SUPPORTED_IRS: - return 0; + case PIPE_SHADER_CAP_SUPPORTED_IRS: { + int ir = 1 << PIPE_SHADER_IR_NATIVE; + /* Old kernels disallowed shader register writes using + * COPY_DATA packets that are used for indirect dispatches. */ + if (HAVE_LLVM >= 0x309 && (sscreen->b.chip_class >= CIK || + (sscreen->b.info.drm_major == 2 && + sscreen->b.info.drm_minor >= 45))) + ir |= 1 << PIPE_SHADER_IR_TGSI; + + return ir; + } case PIPE_SHADER_CAP_DOUBLES: return HAVE_LLVM >= 0x0307; -- 2.8.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev