From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 87 +++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 17 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a4c2ac0..392f85d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -7368,20 +7368,28 @@ static void si_count_scratch_private_memory(struct si_shader_context *ctx) LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); /* No idea why LLVM aligns allocas to 4 elements. */ unsigned alignment = LLVMGetAlignment(inst); unsigned dw_size = align(llvm_get_type_size(type) / 4, alignment); ctx->shader->config.private_mem_vgprs += dw_size; } bb = LLVMGetNextBasicBlock(bb); } } +static void si_init_exec_full_mask(struct si_shader_context *ctx) +{ + LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); + lp_build_intrinsic(ctx->gallivm.builder, + "llvm.amdgcn.init.exec", ctx->voidt, + &full_mask, 1, LP_FUNC_ATTR_CONVERGENT); +} + static void si_init_exec_from_input(struct si_shader_context *ctx, unsigned param, unsigned bitoffset) { LLVMValueRef args[] = { LLVMGetParam(ctx->main_fn, param), LLVMConstInt(ctx->i32, bitoffset, 0), }; lp_build_intrinsic(ctx->gallivm.builder, "llvm.amdgcn.init.exec.from.input", ctx->voidt, args, 2, LP_FUNC_ATTR_CONVERGENT); @@ -7681,79 +7689,128 @@ static void si_get_ps_epilog_key(struct si_shader *shader, key->ps_epilog.states = shader->key.part.ps.epilog; } /** * Build the GS prolog function. Rotate the input vertices for triangle strips * with adjacency. */ static void si_build_gs_prolog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { - const unsigned num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; - const unsigned num_vgprs = 8; + unsigned num_sgprs, num_vgprs; struct gallivm_state *gallivm = &ctx->gallivm; LLVMBuilderRef builder = gallivm->builder; - LLVMTypeRef params[32]; - LLVMTypeRef returns[32]; + LLVMTypeRef params[48]; /* 40 SGPRs (maximum) + some VGPRs */ + LLVMTypeRef returns[48]; LLVMValueRef func, ret; + if (ctx->screen->b.chip_class >= GFX9) { + num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR; + num_vgprs = 5; /* ES inputs are not needed by GS */ + } else { + num_sgprs = GFX6_GS_NUM_USER_SGPR + 2; + num_vgprs = 8; + } + for (unsigned i = 0; i < num_sgprs; ++i) { params[i] = ctx->i32; returns[i] = ctx->i32; } for (unsigned i = 0; i < num_vgprs; ++i) { params[num_sgprs + i] = ctx->i32; returns[num_sgprs + i] = ctx->f32; } /* Create the function. */ si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs, params, num_sgprs + num_vgprs, num_sgprs - 1); func = ctx->main_fn; + /* Set the full EXEC mask for the prolog, because we are only fiddling + * with registers here. The main shader part will set the correct EXEC + * mask. + */ + if (ctx->screen->b.chip_class >= GFX9) + si_init_exec_full_mask(ctx); + /* Copy inputs to outputs. This should be no-op, as the registers match, * but it will prevent the compiler from overwriting them unintentionally. */ ret = ctx->return_value; for (unsigned i = 0; i < num_sgprs; i++) { LLVMValueRef p = LLVMGetParam(func, i); ret = LLVMBuildInsertValue(builder, ret, p, i, ""); } for (unsigned i = 0; i < num_vgprs; i++) { LLVMValueRef p = LLVMGetParam(func, num_sgprs + i); p = LLVMBuildBitCast(builder, p, ctx->f32, ""); ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, ""); } if (key->gs_prolog.states.tri_strip_adj_fix) { /* Remap the input vertices for every other primitive. */ - const unsigned vtx_params[6] = { + const unsigned gfx6_vtx_params[6] = { num_sgprs, num_sgprs + 1, num_sgprs + 3, num_sgprs + 4, num_sgprs + 5, num_sgprs + 6 }; + const unsigned gfx9_vtx_params[3] = { + num_sgprs, + num_sgprs + 1, + num_sgprs + 4, + }; + LLVMValueRef vtx_in[6], vtx_out[6]; LLVMValueRef prim_id, rotate; + if (ctx->screen->b.chip_class >= GFX9) { + for (unsigned i = 0; i < 3; i++) { + vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16); + vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16); + } + } else { + for (unsigned i = 0; i < 6; i++) + vtx_in[i] = LLVMGetParam(func, gfx6_vtx_params[i]); + } + prim_id = LLVMGetParam(func, num_sgprs + 2); rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, ""); for (unsigned i = 0; i < 6; ++i) { - LLVMValueRef base, rotated, actual; - base = LLVMGetParam(func, vtx_params[i]); - rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]); - actual = LLVMBuildSelect(builder, rotate, rotated, base, ""); - actual = LLVMBuildBitCast(builder, actual, ctx->f32, ""); - ret = LLVMBuildInsertValue(builder, ret, actual, vtx_params[i], ""); + LLVMValueRef base, rotated; + base = vtx_in[i]; + rotated = vtx_in[(i + 4) % 6]; + vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, ""); + } + + if (ctx->screen->b.chip_class >= GFX9) { + for (unsigned i = 0; i < 3; i++) { + LLVMValueRef hi, out; + + hi = LLVMBuildShl(builder, vtx_out[i*2+1], + LLVMConstInt(ctx->i32, 16, 0), ""); + out = LLVMBuildOr(builder, vtx_out[i*2], hi, ""); + out = LLVMBuildBitCast(builder, out, ctx->f32, ""); + ret = LLVMBuildInsertValue(builder, ret, out, + gfx9_vtx_params[i], ""); + } + } else { + for (unsigned i = 0; i < 6; i++) { + LLVMValueRef out; + + out = LLVMBuildBitCast(builder, vtx_out[i], ctx->f32, ""); + ret = LLVMBuildInsertValue(builder, ret, out, + gfx6_vtx_params[i], ""); + } } } LLVMBuildRet(builder, ret); } /** * Given a list of shader part functions, build a wrapper function that * runs them in sequence to form a monolithic shader. */ @@ -7821,26 +7878,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx, assert(ac_is_sgpr_param(param) == (gprs < num_sgprs)); assert(gprs + size <= num_sgprs + num_vgprs && (gprs >= num_sgprs || gprs + size <= num_sgprs)); gprs += size; } si_create_function(ctx, "wrapper", NULL, 0, param_types, num_params, last_sgpr_param); - if (is_merged_shader(ctx->shader)) { - LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0); - lp_build_intrinsic(ctx->gallivm.builder, - "llvm.amdgcn.init.exec", ctx->voidt, - &full_mask, 1, LP_FUNC_ATTR_CONVERGENT); - } + if (is_merged_shader(ctx->shader)) + si_init_exec_full_mask(ctx); /* Record the arguments of the function as if they were an output of * a previous part. */ num_out = 0; num_out_sgpr = 0; for (unsigned i = 0; i < num_params; ++i) { LLVMValueRef param = LLVMGetParam(ctx->main_fn, i); LLVMTypeRef param_type = LLVMTypeOf(param); -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev