For radeonsi, native and TGSI use different compilers and this results in different limits for different IR's.
The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE and MAX_THREADS_PER_BLOCK params, but I added a few others as shader related that seemed like they would also typically depend on the compiler. Radeonsi needs these params as we need to restrict the number of used registers for blocks of > 256 threads, we do not know the block size in advance for clover and cannot use shader variants due to clover only giving native code. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/gallium/docs/source/screen.rst | 18 ++++++------- src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +- src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +- src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/trace/tr_screen.c | 5 ++-- src/gallium/include/pipe/p_screen.h | 13 ++++++---- src/gallium/state_trackers/clover/core/device.cpp | 31 +++++++++++++---------- src/gallium/tests/trivial/compute.c | 4 ++- src/mesa/state_tracker/st_extensions.c | 13 +++++----- 12 files changed, 54 insertions(+), 42 deletions(-) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 46ec381..52e07d2 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -436,26 +436,26 @@ pipe_screen::get_compute_param. ``processor-arch-manufacturer-os`` that will be passed on to the compiler. This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM or PIPE_SHADER_IR_NATIVE for their preferred IR. - Value type: null-terminated string. + Value type: null-terminated string. Shader related. * ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions - for grid and block coordinates. Value type: ``uint64_t``. + for grid and block coordinates. Value type: ``uint64_t``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block - units. Value type: ``uint64_t []``. + units. Value type: ``uint64_t []``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread - units. Value type: ``uint64_t []``. + units. Value type: ``uint64_t []``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that - a single block can contain. Value type: ``uint64_t``. + a single block can contain. Value type: ``uint64_t``. Shader related. This may be less than the product of the components of MAX_BLOCK_SIZE and is usually limited by the number of threads that can be resident simultaneously on a compute unit. * ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader related. * ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object allocation in bytes. Value type: ``uint64_t``. * ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 548d215..c5b5ab4 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen, static int ilo_get_compute_param(struct pipe_screen *screen, + unsigned ir_type, enum pipe_compute_cap param, void *ret) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 5836bb2..5c902d4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -358,7 +358,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) } static int -nv50_screen_get_compute_param(struct pipe_screen *pscreen, +nv50_screen_get_compute_param(struct pipe_screen *pscreen, unsigned ir_type, enum pipe_compute_cap param, void *data) { struct nv50_screen *screen = nv50_screen(pscreen); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 553c001..b821a15 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -386,7 +386,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) } static int -nvc0_screen_get_compute_param(struct pipe_screen *pscreen, +nvc0_screen_get_compute_param(struct pipe_screen *pscreen, unsigned ir_type, enum pipe_compute_cap param, void *data) { struct nvc0_screen *screen = nvc0_screen(pscreen); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index b801191..a7d1af2 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: if (shader == PIPE_SHADER_COMPUTE) { uint64_t max_const_buffer_size; - pscreen->get_compute_param(pscreen, + pscreen->get_compute_param(pscreen, 0, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_const_buffer_size); return max_const_buffer_size; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index eed9d83..015d575 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -603,6 +603,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) } static int r600_get_compute_param(struct pipe_screen *screen, + unsigned ir_type, enum pipe_compute_cap param, void *ret) { @@ -669,7 +670,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, uint64_t *max_global_size = ret; uint64_t max_mem_alloc_size; - r600_get_compute_param(screen, + r600_get_compute_param(screen, 0, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_mem_alloc_size); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index dd1103e..331b308 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: { uint64_t max_const_buffer_size; - pscreen->get_compute_param(pscreen, + pscreen->get_compute_param(pscreen, 0, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_const_buffer_size); return max_const_buffer_size; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index b24e185..e19121d 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -174,7 +174,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen, static int -trace_screen_get_compute_param(struct pipe_screen *_screen, +trace_screen_get_compute_param(struct pipe_screen *_screen, unsigned ir_type, enum pipe_compute_cap param, void *data) { struct trace_screen *tr_scr = trace_screen(_screen); @@ -184,10 +184,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen, trace_dump_call_begin("pipe_screen", "get_compute_param"); trace_dump_arg(ptr, screen); + trace_dump_arg(int, ir_type); trace_dump_arg(int, param); trace_dump_arg(ptr, data); - result = screen->get_compute_param(screen, param, data); + result = screen->get_compute_param(screen, ir_type, param, data); trace_dump_ret(int, result); diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 4f30e75..6d44cf7 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -109,13 +109,16 @@ struct pipe_screen { /** * Query a compute-specific capability/parameter/limit. - * \param param one of PIPE_COMPUTE_CAP_x - * \param ret pointer to a preallocated buffer that will be - * initialized to the parameter value, or NULL. - * \return size in bytes of the parameter value that would be - * returned. + * \param ir_type shader IR type for which the param applies, or don't care + * if the param is not shader related + * \param param one of PIPE_COMPUTE_CAP_x + * \param ret pointer to a preallocated buffer that will be + * initialized to the parameter value, or NULL. + * \return size in bytes of the parameter value that would be + * returned. */ int (*get_compute_param)(struct pipe_screen *, + unsigned ir_type, enum pipe_compute_cap param, void *ret); diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 1be2f64..97d1d92 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -30,11 +30,12 @@ using namespace clover; namespace { template<typename T> std::vector<T> - get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) { - int sz = pipe->get_compute_param(pipe, cap, NULL); + get_compute_param(pipe_screen *pipe, unsigned ir_format, + pipe_compute_cap cap) { + int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL); std::vector<T> v(sz / sizeof(T)); - pipe->get_compute_param(pipe, cap, &v.front()); + pipe->get_compute_param(pipe, ir_format, cap, &v.front()); return v; } } @@ -115,19 +116,19 @@ device::max_samplers() const { cl_ulong device::max_mem_global() const { - return get_compute_param<uint64_t>(pipe, + return get_compute_param<uint64_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0]; } cl_ulong device::max_mem_local() const { - return get_compute_param<uint64_t>(pipe, + return get_compute_param<uint64_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0]; } cl_ulong device::max_mem_input() const { - return get_compute_param<uint64_t>(pipe, + return get_compute_param<uint64_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0]; } @@ -146,30 +147,30 @@ device::max_const_buffers() const { size_t device::max_threads_per_block() const { return get_compute_param<uint64_t>( - pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0]; + pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0]; } cl_ulong device::max_mem_alloc_size() const { - return get_compute_param<uint64_t>(pipe, + return get_compute_param<uint64_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0]; } cl_uint device::max_clock_frequency() const { - return get_compute_param<uint32_t>(pipe, + return get_compute_param<uint32_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0]; } cl_uint device::max_compute_units() const { - return get_compute_param<uint32_t>(pipe, + return get_compute_param<uint32_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0]; } bool device::image_support() const { - return get_compute_param<uint32_t>(pipe, + return get_compute_param<uint32_t>(pipe, ir_format(), PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0]; } @@ -181,13 +182,15 @@ device::has_doubles() const { std::vector<size_t> device::max_block_size() const { - auto v = get_compute_param<uint64_t>(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); + auto v = get_compute_param<uint64_t>(pipe, ir_format(), + PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); return { v.begin(), v.end() }; } cl_uint device::subgroup_size() const { - return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0]; + return get_compute_param<uint32_t>(pipe, ir_format(), + PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0]; } std::string @@ -209,7 +212,7 @@ device::ir_format() const { std::string device::ir_target() const { std::vector<char> target = get_compute_param<char>( - pipe, PIPE_COMPUTE_CAP_IR_TARGET); + pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET); return { target.data() }; } diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index af3e3aa..c94e2e1 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -58,7 +58,9 @@ struct context { uint64_t __v[4]; \ int __i, __n; \ \ - __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \ + __n = ctx->screen->get_compute_param(ctx->screen, \ + PIPE_SHADER_IR_TGSI, \ + c, __v); \ printf("%s: {", #c); \ \ for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 2fdaba0..e56cc23 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1105,14 +1105,15 @@ void st_init_extensions(struct pipe_screen *screen, if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { uint64_t grid_size[3], block_size[3]; - screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE, - grid_size); - screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, - block_size); - screen->get_compute_param(screen, + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size); + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size); + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &consts->MaxComputeWorkGroupInvocations); - screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, &consts->MaxComputeSharedMemorySize); for (i = 0; i < 3; i++) { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev