From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_pipe.c | 31 +++++++++++++++++++++---- src/gallium/drivers/radeonsi/si_pipe.h | 3 +++ src/gallium/drivers/radeonsi/si_state_shaders.c | 8 +++---- 3 files changed, 34 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 8bf6fd9..082ba99 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -734,25 +734,30 @@ static void si_destroy_screen(struct pipe_screen* pscreen) sscreen->gs_prologs, sscreen->ps_prologs, sscreen->ps_epilogs }; unsigned i; if (!sscreen->b.ws->unref(sscreen->b.ws)) return; util_queue_destroy(&sscreen->shader_compiler_queue); + util_queue_destroy(&sscreen->shader_compiler_queue_low_priority); for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++) if (sscreen->tm[i]) LLVMDisposeTargetMachine(sscreen->tm[i]); + for (i = 0; i < ARRAY_SIZE(sscreen->tm_low_priority); i++) + if (sscreen->tm_low_priority[i]) + LLVMDisposeTargetMachine(sscreen->tm_low_priority[i]); + /* Free shader parts. */ for (i = 0; i < ARRAY_SIZE(parts); i++) { while (parts[i]) { struct si_shader_part *part = parts[i]; parts[i] = part->next; radeon_shader_binary_clean(&part->binary); FREE(part); } } @@ -852,21 +857,21 @@ static void si_test_vmfault(struct si_screen *sscreen) if (sscreen->b.debug_flags & DBG_TEST_VMFAULT_SHADER) { util_test_constant_buffer(ctx, buf); puts("VM fault test: Shader - done."); } exit(0); } struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); - unsigned num_cpus, num_compiler_threads, i; + unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i; if (!sscreen) { return NULL; } /* Set functions first. */ sscreen->b.b.context_create = si_pipe_create_context; sscreen->b.b.destroy = si_destroy_screen; sscreen->b.b.get_param = si_get_param; sscreen->b.b.get_shader_param = si_get_shader_param; @@ -877,31 +882,47 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) if (!r600_common_screen_init(&sscreen->b, ws) || !si_init_gs_info(sscreen) || !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } /* Only enable as many threads as we have target machines, but at most * the number of CPUs - 1 if there is more than one. */ - num_cpus = sysconf(_SC_NPROCESSORS_ONLN); - num_cpus = MAX2(1, num_cpus - 1); - num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm)); + num_threads = sysconf(_SC_NPROCESSORS_ONLN); + num_threads = MAX2(1, num_threads - 1); + num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm)); + num_compiler_threads_lowprio = + MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority)); if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader", 32, num_compiler_threads, 0)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } + /* The queue must be large enough so that adding optimized shaders + * doesn't stall draw calls when the queue is full. Especially varying + * packing generates a very high volume of optimized shader compilation + * jobs. + */ + if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, + "si_shader_low", + 1024, num_compiler_threads, + UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { + si_destroy_shader_cache(sscreen); + FREE(sscreen); + return NULL; + } + si_handle_env_var_force_family(sscreen); if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); /* Hawaii has a bug with offchip buffers > 256 that can be worked * around by setting 4K granularity. */ sscreen->tess_offchip_block_dw_size = sscreen->b.family == CHIP_HAWAII ? 4096 : 8192; @@ -951,20 +972,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2; sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH; if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; for (i = 0; i < num_compiler_threads; i++) sscreen->tm[i] = si_create_llvm_target_machine(sscreen); + for (i = 0; i < num_compiler_threads_lowprio; i++) + sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen); /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0); if (sscreen->b.debug_flags & DBG_TEST_DMA) r600_test_dma(&sscreen->b); if (sscreen->b.debug_flags & (DBG_TEST_VMFAULT_CP | DBG_TEST_VMFAULT_SDMA | DBG_TEST_VMFAULT_SHADER)) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 13ec072..e917cb1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -103,20 +103,23 @@ struct si_screen { * variants of VS and TES are cached, so LS and ES aren't. * - GS and CS aren't cached, but it's certainly possible to cache * those as well. */ mtx_t shader_cache_mutex; struct hash_table *shader_cache; /* Shader compiler queue for multithreaded compilation. */ struct util_queue shader_compiler_queue; LLVMTargetMachineRef tm[4]; /* used by the queue only */ + + struct util_queue shader_compiler_queue_low_priority; + LLVMTargetMachineRef tm_low_priority[4]; }; struct si_blend_color { struct r600_atom atom; struct pipe_blend_color state; }; struct si_sampler_view { struct pipe_sampler_view base; /* [0..7] = image descriptor diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 62bb221..5a22add 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1443,22 +1443,22 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, static void si_build_shader_variant(void *job, int thread_index) { struct si_shader *shader = (struct si_shader *)job; struct si_shader_selector *sel = shader->selector; struct si_screen *sscreen = sel->screen; LLVMTargetMachineRef tm; struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug; int r; if (thread_index >= 0) { - assert(thread_index < ARRAY_SIZE(sscreen->tm)); - tm = sscreen->tm[thread_index]; + assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority)); + tm = sscreen->tm_low_priority[thread_index]; if (!debug->async) debug = NULL; } else { tm = shader->compiler_ctx_state.tm; } r = si_shader_create(sscreen, tm, shader, debug); if (unlikely(r)) { R600_ERR("Failed to build shader variant (type=%u) %d\n", sel->type, r); @@ -1672,21 +1672,21 @@ again: } else { sel->last_variant->next_variant = shader; sel->last_variant = shader; } /* If it's an optimized shader, compile it asynchronously. */ if (shader->is_optimized && !is_pure_monolithic && thread_index < 0) { /* Compile it asynchronously. */ - util_queue_add_job(&sscreen->shader_compiler_queue, + util_queue_add_job(&sscreen->shader_compiler_queue_low_priority, shader, &shader->optimized_ready, si_build_shader_variant, NULL); /* Use the default (unoptimized) shader for now. */ memset(&key->opt, 0, sizeof(key->opt)); mtx_unlock(&sel->mutex); goto again; } assert(!shader->is_optimized); @@ -2251,21 +2251,21 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state) sctx->do_update_shaders = true; if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess) si_update_tess_uses_prim_id(sctx); si_mark_atom_dirty(sctx, &sctx->cb_render_state); si_set_active_descriptors_for_shader(sctx, sel); } static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) { if (shader->is_optimized) { - util_queue_drop_job(&sctx->screen->shader_compiler_queue, + util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority, &shader->optimized_ready); util_queue_fence_destroy(&shader->optimized_ready); } if (shader->pm4) { switch (shader->selector->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) { assert(sctx->b.chip_class <= VI); si_pm4_delete_state(sctx, ls, shader->pm4); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev