Patches 2 - 4: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Fri, Nov 10, 2017 at 4:13 AM, Timothy Arceri <tarc...@itsqueeze.com> wrote: > --- > src/amd/common/ac_nir_to_llvm.c | 11 +++++----- > src/amd/common/ac_shader_abi.h | 4 ++++ > src/gallium/drivers/radeonsi/si_shader.c | 35 > +++++++++++++++++++------------- > 3 files changed, 31 insertions(+), 19 deletions(-) > > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index 2ae656693f..36f471dcc7 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -3902,46 +3902,45 @@ static LLVMValueRef visit_interp(struct > nir_to_llvm_context *ctx, > > LLVMConstInt(ctx->ac.i32, 2, false), > llvm_chan, > attr_number, > ctx->prim_mask); > } > } > return build_varying_gather_values(&ctx->ac, result, > instr->num_components, > > instr->variables[0]->var->data.location_frac); > } > > static void > -visit_emit_vertex(struct nir_to_llvm_context *ctx, > - const nir_intrinsic_instr *instr) > +visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef > *addrs) > { > LLVMValueRef gs_next_vertex; > LLVMValueRef can_emit; > int idx; > + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); > > - assert(instr->const_index[0] == 0); > /* Write vertex attribute values to GSVS ring */ > gs_next_vertex = LLVMBuildLoad(ctx->builder, > ctx->gs_next_vertex, > ""); > > /* If this thread has already emitted the declared maximum number of > * vertices, kill it: excessive vertex emissions are not supposed to > * have any effect, and GS threads have no externally observable > * effects other than emitting vertices. > */ > can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex, > LLVMConstInt(ctx->ac.i32, > ctx->gs_max_out_vertices, false), ""); > ac_build_kill_if_false(&ctx->ac, can_emit); > > /* loop num outputs */ > idx = 0; > for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { > - LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; > + LLVMValueRef *out_ptr = &addrs[i * 4]; > int length = 4; > int slot = idx; > int slot_inc = 1; > > if (!(ctx->output_mask & (1ull << i))) > continue; > > if (i == VARYING_SLOT_CLIP_DIST0) { > /* pack clip and cull into a single set of slots */ > length = ctx->num_output_clips + > ctx->num_output_culls; > @@ -4160,21 +4159,22 @@ static void visit_intrinsic(struct ac_nir_context > *ctx, > case nir_intrinsic_var_atomic_exchange: > case nir_intrinsic_var_atomic_comp_swap: > result = visit_var_atomic(ctx->nctx, instr); > break; > case nir_intrinsic_interp_var_at_centroid: > case nir_intrinsic_interp_var_at_sample: > case nir_intrinsic_interp_var_at_offset: > result = visit_interp(ctx->nctx, instr); > break; > case nir_intrinsic_emit_vertex: > - visit_emit_vertex(ctx->nctx, instr); > + assert(instr->const_index[0] == 0); > + ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs); > break; > case nir_intrinsic_end_primitive: > visit_end_primitive(ctx->nctx, instr); > break; > case nir_intrinsic_load_tess_coord: > result = visit_load_tess_coord(ctx->nctx, instr); > break; > case nir_intrinsic_load_patch_vertices_in: > result = LLVMConstInt(ctx->ac.i32, > ctx->nctx->options->key.tcs.input_vertices, false); > break; > @@ -6490,20 +6490,21 @@ LLVMModuleRef > ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, > ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, > > ac_nir_get_max_workgroup_size(ctx.options->chip_class, > > shaders[i])); > } > > create_function(&ctx, shaders[shader_count - 1]->info.stage, > shader_count >= 2, > shader_count >= 2 ? shaders[shader_count - > 2]->info.stage : MESA_SHADER_VERTEX); > > ctx.abi.inputs = &ctx.inputs[0]; > ctx.abi.emit_outputs = handle_shader_outputs_post; > + ctx.abi.emit_vertex = visit_emit_vertex; > ctx.abi.load_ssbo = radv_load_ssbo; > ctx.abi.load_sampler_desc = radv_get_sampler_desc; > ctx.abi.clamp_shadow_reference = false; > > if (shader_count >= 2) > ac_init_exec_full_mask(&ctx.ac); > > if (ctx.ac.chip_class == GFX9 && > shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) > ac_nir_fixup_ls_hs_input_vgprs(&ctx); > diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h > index 14517d5570..27586d0212 100644 > --- a/src/amd/common/ac_shader_abi.h > +++ b/src/amd/common/ac_shader_abi.h > @@ -51,20 +51,24 @@ struct ac_shader_abi { > * > * Currently only used for NIR shaders; indexed by variables' > * driver_location. > */ > LLVMValueRef *inputs; > > void (*emit_outputs)(struct ac_shader_abi *abi, > unsigned max_outputs, > LLVMValueRef *addrs); > > + void (*emit_vertex)(struct ac_shader_abi *abi, > + unsigned stream, > + LLVMValueRef *addrs); > + > LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef > index); > > /** > * Load the descriptor for the given buffer. > * > * \param buffer the buffer as presented in NIR: this is the > descriptor > * in Vulkan, and the buffer index in OpenGL/Gallium > * \param write whether buffer contents will be written > */ > LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index d234e08071..47ca64fdea 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -4031,39 +4031,35 @@ static unsigned si_llvm_get_stream(struct > lp_build_tgsi_context *bld_base, > unsigned stream; > > assert(src0.File == TGSI_FILE_IMMEDIATE); > > imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX]; > stream = LLVMConstIntGetZExtValue(imm) & 0x3; > return stream; > } > > /* Emit one vertex from the geometry shader */ > -static void si_llvm_emit_vertex( > - const struct lp_build_tgsi_action *action, > - struct lp_build_tgsi_context *bld_base, > - struct lp_build_emit_data *emit_data) > +static void si_llvm_emit_vertex(struct ac_shader_abi *abi, > + unsigned stream, > + LLVMValueRef *addrs) > { > - struct si_shader_context *ctx = si_shader_context(bld_base); > - struct lp_build_context *uint = &bld_base->uint_bld; > + struct si_shader_context *ctx = si_shader_context_from_abi(abi); > + struct tgsi_shader_info *info = &ctx->shader->selector->info; > + struct lp_build_context *uint = &ctx->bld_base.uint_bld; > struct si_shader *shader = ctx->shader; > - struct tgsi_shader_info *info = &shader->selector->info; > struct lp_build_if_state if_state; > LLVMValueRef soffset = LLVMGetParam(ctx->main_fn, > ctx->param_gs2vs_offset); > LLVMValueRef gs_next_vertex; > LLVMValueRef can_emit; > unsigned chan, offset; > int i; > - unsigned stream; > - > - stream = si_llvm_get_stream(bld_base, emit_data); > > /* Write vertex attribute values to GSVS ring */ > gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, > ctx->gs_next_vertex[stream], > ""); > > /* If this thread has already emitted the declared maximum number of > * vertices, skip the write: excessive vertex emissions are not > * supposed to have any effect. > * > @@ -4077,28 +4073,26 @@ static void si_llvm_emit_vertex( > > bool use_kill = !info->writes_memory; > if (use_kill) { > ac_build_kill_if_false(&ctx->ac, can_emit); > } else { > lp_build_if(&if_state, &ctx->gallivm, can_emit); > } > > offset = 0; > for (i = 0; i < info->num_outputs; i++) { > - LLVMValueRef *out_ptr = ctx->outputs[i]; > - > for (chan = 0; chan < 4; chan++) { > if (!(info->output_usagemask[i] & (1 << chan)) || > ((info->output_streams[i] >> (2 * chan)) & 3) != > stream) > continue; > > - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, > out_ptr[chan], ""); > + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, > addrs[4 * i + chan], ""); > LLVMValueRef voffset = > LLVMConstInt(ctx->i32, offset * > > shader->selector->gs_max_out_vertices, 0); > offset++; > > voffset = lp_build_add(uint, voffset, gs_next_vertex); > voffset = lp_build_mul_imm(uint, voffset, 4); > > out_val = ac_to_integer(&ctx->ac, out_val); > > @@ -4115,20 +4109,32 @@ static void si_llvm_emit_vertex( > > LLVMBuildStore(ctx->ac.builder, gs_next_vertex, > ctx->gs_next_vertex[stream]); > > /* Signal vertex emission */ > ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | > (stream << 8), > si_get_gs_wave_id(ctx)); > if (!use_kill) > lp_build_endif(&if_state); > } > > +/* Emit one vertex from the geometry shader */ > +static void si_tgsi_emit_vertex( > + const struct lp_build_tgsi_action *action, > + struct lp_build_tgsi_context *bld_base, > + struct lp_build_emit_data *emit_data) > +{ > + struct si_shader_context *ctx = si_shader_context(bld_base); > + unsigned stream = si_llvm_get_stream(bld_base, emit_data); > + > + si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]); > +} > + > /* Cut one primitive from the geometry shader */ > static void si_llvm_emit_primitive( > const struct lp_build_tgsi_action *action, > struct lp_build_tgsi_context *bld_base, > struct lp_build_emit_data *emit_data) > { > struct si_shader_context *ctx = si_shader_context(bld_base); > unsigned stream; > > /* Signal primitive cut */ > @@ -5634,21 +5640,21 @@ static void si_init_shader_ctx(struct > si_shader_context *ctx, > bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit; > bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit; > bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit; > bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit; > bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = > "llvm.amdgcn.readfirstlane"; > bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit; > bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = > "llvm.amdgcn.readlane"; > bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = > read_invoc_fetch_args; > bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit; > > - bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex; > + bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex; > bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = > si_llvm_emit_primitive; > bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; > } > > static void si_optimize_vs_outputs(struct si_shader_context *ctx) > { > struct si_shader *shader = ctx->shader; > struct tgsi_shader_info *info = &shader->selector->info; > > if ((ctx->type != PIPE_SHADER_VERTEX && > @@ -5748,20 +5754,21 @@ static bool si_compile_tgsi_main(struct > si_shader_context *ctx, > bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes; > if (shader->key.as_es) > bld_base->emit_epilogue = si_llvm_emit_es_epilogue; > else { > ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; > bld_base->emit_epilogue = si_tgsi_emit_epilogue; > } > break; > case PIPE_SHADER_GEOMETRY: > bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs; > + ctx->abi.emit_vertex = si_llvm_emit_vertex; > bld_base->emit_epilogue = si_llvm_emit_gs_epilogue; > break; > case PIPE_SHADER_FRAGMENT: > ctx->load_input = declare_input_fs; > ctx->abi.emit_outputs = si_llvm_return_fs_outputs; > bld_base->emit_epilogue = si_tgsi_emit_epilogue; > break; > case PIPE_SHADER_COMPUTE: > break; > default: > -- > 2.14.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev