Not all drivers care when cs.reg_*_mem change. (ir3 only cares about req_input_mem and removing that dependency should be easy.) Add some caps to let clover make better decisions about when it needs to re- create the compute-state CSO.
This way, if the kernel is compiled early for clGetKernelWorkGroupInfo() it doesn't end up getting compiled a second time when the kernel is launched for the first time (clEnqueueNDRangeKernel(), etc). Signed-off-by: Rob Clark <robdcl...@gmail.com> --- If we pre-compile the kernel then we pretty much end up compiling it at least twice, since we don't know the size of the input/local mem yet. But if driver doesn't care about these, that is a bit silly. Maybe a bit pre-mature optimization, but figured I'd see what others think of the idea. src/gallium/drivers/freedreno/a5xx/fd5_compute.c | 3 +++ src/gallium/include/pipe/p_defines.h | 5 +++++ src/gallium/state_trackers/clover/core/device.cpp | 7 +++++++ src/gallium/state_trackers/clover/core/device.hpp | 7 +++++++ src/gallium/state_trackers/clover/core/kernel.cpp | 4 ++-- 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c index 52b60e0c5e2..85efe7ca120 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c @@ -137,6 +137,9 @@ fd5_get_compute_param(struct fd_screen *screen, enum pipe_compute_cap param, // RET((uint32_t []){ 64 }); RET((uint32_t []){ 32 }); + case PIPE_COMPUTE_CAP_SHADER_DEPS: + RET((uint32_t []){ PIPE_SHADER_DEP_INPUT_MEM }); + case PIPE_COMPUTE_CAP_IR_TARGET: if (ret) sprintf(ret, ir); diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 0fa96c0d412..f890f99bf01 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -897,6 +897,10 @@ enum pipe_shader_ir PIPE_SHADER_IR_SPIRV }; +#define PIPE_SHADER_DEP_LOCAL_MEM 0x1 /* recompile if req_local_mem changes */ +#define PIPE_SHADER_DEP_PRIVATE_MEM 0x2 /* recompile if req_private_mem changes */ +#define PIPE_SHADER_DEP_INPUT_MEM 0x4 /* recompile if req_input_mem changes */ + /** * Compute-specific implementation capability. They can be queried * using pipe_screen::get_compute_param or pipe_screen::get_kernel_param. @@ -919,6 +923,7 @@ enum pipe_compute_cap PIPE_COMPUTE_CAP_IMAGES_SUPPORTED, PIPE_COMPUTE_CAP_SUBGROUP_SIZE, PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK, + PIPE_COMPUTE_CAP_SHADER_DEPS, /* bitmask of PIPE_SHADER_DEP_x */ }; /** diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 97e098f65de..e7037afa354 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -51,6 +51,13 @@ device::device(clover::platform &platform, pipe_loader_device *ldev) : throw error(CL_INVALID_DEVICE); } + uint32_t shader_deps = + get_compute_param<uint32_t>(pipe, ir_format(), + PIPE_COMPUTE_CAP_SHADER_DEPS)[0]; + dep_local_mem = !!(shader_deps & PIPE_SHADER_DEP_LOCAL_MEM); + dep_private_mem = !!(shader_deps & PIPE_SHADER_DEP_PRIVATE_MEM); + dep_input_mem = !!(shader_deps & PIPE_SHADER_DEP_INPUT_MEM); + uint32_t shareable_shaders = pipe->get_param(pipe, PIPE_CAP_SHAREABLE_SHADERS); diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index 63cf3abccc4..8de38201777 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -99,6 +99,13 @@ namespace clover { */ pipe_context *pctx; + /* things that the compute-state CSO depends on, which determines + * what triggers recreating the CSO. + */ + bool dep_local_mem; + bool dep_private_mem; + bool dep_input_mem; + private: pipe_loader_device *ldev; }; diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index 424e44f4ab4..80861e06df1 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -287,10 +287,10 @@ kernel::exec_context::bind_st(const device &_d, bool force) { if (!pctx) return NULL; - if (cs.req_input_mem != input.size()) + if (_d.dep_input_mem && (cs.req_input_mem != input.size())) needs_rebuild = true; - if (cs.req_local_mem != mem_local) + if (_d.dep_local_mem && (cs.req_local_mem != mem_local)) needs_rebuild = true; // Create a new compute state if anything changed. -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev