From: Marek Olšák <marek.ol...@amd.com> To make dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 finish sooner on the older CPUs. (otherwise it gets killed and we fail the test) --- src/amd/common/ac_llvm_util.c | 18 ++++++++++-- src/amd/common/ac_llvm_util.h | 11 ++++++- src/gallium/drivers/radeonsi/si_pipe.c | 12 +++++++- src/gallium/drivers/radeonsi/si_shader.c | 29 +++++++++++++++---- .../drivers/radeonsi/si_shader_internal.h | 3 +- .../drivers/radeonsi/si_shader_tgsi_setup.c | 8 +++-- 6 files changed, 68 insertions(+), 13 deletions(-)
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 0c8dbf1ec51..b6960f7382d 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -130,20 +130,21 @@ const char *ac_get_llvm_processor_name(enum radeon_family family) return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902"; case CHIP_VEGA20: return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902"; default: return ""; } } static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options, + LLVMCodeGenOptLevel level, const char **out_triple) { assert(family >= CHIP_TAHITI); char features[256]; const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; LLVMTargetRef target = ac_get_llvm_target(triple); bool barrier_does_waitcnt = family != CHIP_VEGA20; snprintf(features, sizeof(features), "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s", @@ -151,21 +152,21 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "", barrier_does_waitcnt ? ",+auto-waitcnt-before-barrier" : ""); LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, triple, ac_get_llvm_processor_name(family), features, - LLVMCodeGenLevelDefault, + level, LLVMRelocDefault, LLVMCodeModelDefault); if (out_triple) *out_triple = triple; return tm; } static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, bool check_ir) @@ -294,25 +295,34 @@ ac_count_scratch_private_memory(LLVMValueRef function) bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, bool okay_to_leak_target_library_info, enum radeon_family family, enum ac_target_machine_options tm_options) { const char *triple; memset(compiler, 0, sizeof(*compiler)); - compiler->tm = ac_create_target_machine(family, - tm_options, &triple); + compiler->tm = ac_create_target_machine(family, tm_options, + LLVMCodeGenLevelDefault, + &triple); if (!compiler->tm) return false; + if (tm_options & AC_TM_CREATE_LOW_OPT) { + compiler->low_opt_tm = + ac_create_target_machine(family, tm_options, + LLVMCodeGenLevelLess, NULL); + if (!compiler->low_opt_tm) + goto fail; + } + if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) { compiler->target_library_info = ac_create_target_library_info(triple); if (!compiler->target_library_info) goto fail; } compiler->passmgr = ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); if (!compiler->passmgr) @@ -327,13 +337,15 @@ fail: void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) { if (compiler->passmgr) LLVMDisposePassManager(compiler->passmgr); #if HAVE_LLVM >= 0x0700 /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */ if (compiler->target_library_info) ac_dispose_target_library_info(compiler->target_library_info); #endif + if (compiler->low_opt_tm) + LLVMDisposeTargetMachine(compiler->low_opt_tm); if (compiler->tm) LLVMDisposeTargetMachine(compiler->tm); } diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index e5b93037d26..c0e759b8836 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -56,34 +56,43 @@ enum ac_func_attr { AC_FUNC_ATTR_LEGACY = (1u << 31), }; enum ac_target_machine_options { AC_TM_SUPPORTS_SPILL = (1 << 0), AC_TM_SISCHED = (1 << 1), AC_TM_FORCE_ENABLE_XNACK = (1 << 2), AC_TM_FORCE_DISABLE_XNACK = (1 << 3), AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4), AC_TM_CHECK_IR = (1 << 5), + AC_TM_CREATE_LOW_OPT = (1 << 6), }; enum ac_float_mode { AC_FLOAT_MODE_DEFAULT, AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH, AC_FLOAT_MODE_UNSAFE_FP_MATH, }; /* Per-thread persistent LLVM objects. */ struct ac_llvm_compiler { - LLVMTargetMachineRef tm; LLVMTargetLibraryInfoRef target_library_info; LLVMPassManagerRef passmgr; + + /* Default compiler. */ + LLVMTargetMachineRef tm; struct ac_compiler_passes *passes; + + /* Optional compiler for faster compilation with fewer optimizations. + * LLVM modules can be created with "tm" too. There is no difference. + */ + LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */ + struct ac_compiler_passes *low_opt_passes; }; const char *ac_get_llvm_processor_name(enum radeon_family family); void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, enum ac_func_attr attr); void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask); void ac_dump_module(LLVMModuleRef module); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 22e333aec77..4f00eb5c2e2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -100,35 +100,45 @@ static const struct debug_named_value debug_options[] = { { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." }, { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." }, { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." }, DEBUG_NAMED_VALUE_END /* must be last */ }; static void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler) { + /* Only create the less-optimizing version of the compiler on APUs + * predating Ryzen (Raven). */ + bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && + sscreen->info.chip_class <= VI; + enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) | (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) | (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) | - (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0); + (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) | + (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); ac_init_llvm_once(); ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options); compiler->passes = ac_create_llvm_passes(compiler->tm); + + if (compiler->low_opt_tm) + compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm); } static void si_destroy_compiler(struct ac_llvm_compiler *compiler) { ac_destroy_llvm_passes(compiler->passes); + ac_destroy_llvm_passes(compiler->low_opt_passes); ac_destroy_llvm_compiler(compiler); } /* * pipe_context */ static void si_destroy_context(struct pipe_context *context) { struct si_context *sctx = (struct si_context *)context; int i; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 43ba23ff494..405833d3ba7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5638,21 +5638,22 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader, check_debug_option); } static int si_compile_llvm(struct si_screen *sscreen, struct ac_shader_binary *binary, struct si_shader_config *conf, struct ac_llvm_compiler *compiler, LLVMModuleRef mod, struct pipe_debug_callback *debug, unsigned processor, - const char *name) + const char *name, + bool less_optimized) { int r = 0; unsigned count = p_atomic_inc_return(&sscreen->num_compilations); if (si_can_dump_shader(sscreen, processor)) { fprintf(stderr, "radeonsi: Compiling shader %d\n", count); if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) { fprintf(stderr, "%s LLVM IR:\n\n", name); ac_dump_module(mod); @@ -5660,21 +5661,22 @@ static int si_compile_llvm(struct si_screen *sscreen, } } if (sscreen->record_llvm_ir) { char *ir = LLVMPrintModuleToString(mod); binary->llvm_ir_string = strdup(ir); LLVMDisposeMessage(ir); } if (!si_replace_shader(count, binary)) { - r = si_llvm_compile(mod, binary, compiler, debug); + r = si_llvm_compile(mod, binary, compiler, debug, + less_optimized); if (r) return r; } si_shader_binary_read_config(binary, conf, 0); /* Enable 64-bit and 16-bit denormals, because there is no performance * cost. * * If denormals are enabled, all floating-point output modifiers are @@ -5877,21 +5879,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, LLVMBuildRetVoid(ctx.ac.builder); ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */ si_llvm_optimize_module(&ctx); r = si_compile_llvm(sscreen, &ctx.shader->binary, &ctx.shader->config, ctx.compiler, ctx.ac.module, debug, PIPE_SHADER_GEOMETRY, - "GS Copy Shader"); + "GS Copy Shader", false); if (!r) { if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY)) fprintf(stderr, "GS Copy Shader:\n"); si_shader_dump(sscreen, ctx.shader, debug, PIPE_SHADER_GEOMETRY, stderr, true); r = si_shader_binary_upload(sscreen, ctx.shader); } si_llvm_dispose(&ctx); @@ -6783,20 +6785,36 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, assert(num_out_sgpr + 1 == num_out); num_out_sgpr = num_out; } } } } LLVMBuildRetVoid(builder); } +static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, + struct si_shader_selector *sel) +{ + if (!compiler->low_opt_passes) + return false; + + /* Assume a slow CPU. */ + assert(!sel->screen->info.has_dedicated_vram && + sel->screen->info.chip_class <= VI); + + /* For a crazy dEQP test containing 2597 memory opcodes, mostly + * buffer stores. */ + return sel->type == PIPE_SHADER_COMPUTE && + sel->info.num_memory_instructions > 1000; +} + int si_compile_tgsi_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug) { struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; int r = -1; /* Dump TGSI code before doing TGSI->LLVM conversion in case the @@ -7015,21 +7033,22 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, ctx.shader->config.private_mem_vgprs = ac_count_scratch_private_memory(ctx.main_fn); } /* Make sure the input is a pointer and not integer followed by inttoptr. */ assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) == LLVMPointerTypeKind); /* Compile to bytecode. */ r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler, - ctx.ac.module, debug, ctx.type, "TGSI shader"); + ctx.ac.module, debug, ctx.type, "TGSI shader", + si_should_optimize_less(compiler, shader->selector)); si_llvm_dispose(&ctx); if (r) { fprintf(stderr, "LLVM failed to compile shader\n"); return r; } /* Validate SGPR and VGPR usage for compute to detect compiler bugs. * LLVM 3.9svn has this bug. */ if (sel->type == PIPE_SHADER_COMPUTE) { @@ -7182,21 +7201,21 @@ si_get_shader_part(struct si_screen *sscreen, default: unreachable("bad shader part"); } build(&ctx, key); /* Compile. */ si_llvm_optimize_module(&ctx); if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler, - ctx.ac.module, debug, ctx.type, name)) { + ctx.ac.module, debug, ctx.type, name, false)) { FREE(result); result = NULL; goto out; } result->next = *list; *list = result; out: si_llvm_dispose(&ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 21e325c2d82..36351391d95 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -210,21 +210,22 @@ si_shader_context(struct lp_build_tgsi_context *bld_base) static inline struct si_shader_context * si_shader_context_from_abi(struct ac_shader_abi *abi) { struct si_shader_context *ctx = NULL; return container_of(abi, ctx, abi); } unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, struct ac_llvm_compiler *compiler, - struct pipe_debug_callback *debug); + struct pipe_debug_callback *debug, + bool less_optimized); LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type); LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type, LLVMValueRef value); LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValueRef index, unsigned num); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index b486be25749..b9ed0fc3ab0 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -75,35 +75,39 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) LLVMDisposeMessage(description); } /** * Compile an LLVM module to machine code. * * @returns 0 for success, 1 for failure */ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, struct ac_llvm_compiler *compiler, - struct pipe_debug_callback *debug) + struct pipe_debug_callback *debug, + bool less_optimized) { + struct ac_compiler_passes *passes = + less_optimized && compiler->low_opt_passes ? + compiler->low_opt_passes : compiler->passes; struct si_llvm_diagnostics diag; LLVMContextRef llvm_ctx; diag.debug = debug; diag.retval = 0; /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); /* Compile IR. */ - if (!ac_compile_module_to_binary(compiler->passes, M, binary)) + if (!ac_compile_module_to_binary(passes, M, binary)) diag.retval = 1; if (diag.retval != 0) pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); return diag.retval; } LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, enum tgsi_opcode_type type) { -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev