From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 2 +- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 + src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 15 +++++++-------- src/gallium/drivers/radeonsi/si_shader.c | 14 ++++++++------ src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 14 +++++++++----- 6 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index e450092a82c..66f752989ab 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -306,21 +306,21 @@ lp_build_tgsi_inst_llvm( if (info->output_mode == TGSI_OUTPUT_REPLICATE && bld_base->soa) { val = emit_data.output[0]; memset(emit_data.output, 0, sizeof(emit_data.output)); TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { emit_data.output[chan_index] = val; } } } if (info->num_dst > 0 && info->opcode != TGSI_OPCODE_STORE) { - bld_base->emit_store(bld_base, inst, info, emit_data.output); + bld_base->emit_store(bld_base, inst, info, 0, emit_data.output); } return TRUE; } LLVMValueRef lp_build_emit_fetch_src( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type stype, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index eb632b700ab..eeeea507810 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -363,20 +363,21 @@ struct lp_build_tgsi_context LLVMValueRef, unsigned, unsigned, unsigned, unsigned); void (*emit_debug)(struct lp_build_tgsi_context *, const struct tgsi_full_instruction *, const struct tgsi_opcode_info *); void (*emit_store)(struct lp_build_tgsi_context *, const struct tgsi_full_instruction *, const struct tgsi_opcode_info *, + unsigned index, LLVMValueRef dst[4]); void (*emit_declaration)(struct lp_build_tgsi_context *, const struct tgsi_full_declaration *decl); void (*emit_immediate)(struct lp_build_tgsi_context *, const struct tgsi_full_immediate *imm); /* Allow the user to store data in this structure rather than passing it diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index f16c579f38d..45110e8b9fe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -1906,33 +1906,32 @@ emit_debug( lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); } } } static void emit_store( struct lp_build_tgsi_context * bld_base, const struct tgsi_full_instruction * inst, const struct tgsi_opcode_info * info, + unsigned index, LLVMValueRef dst[4]) { - unsigned chan_index; enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); - if(info->num_dst) { - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { - - if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) - continue; - emit_store_chan(bld_base, inst, 0, chan_index, dst[chan_index]); - } + unsigned writemask = inst->Dst[index].Register.WriteMask; + while (writemask) { + unsigned chan_index = u_bit_scan(&writemask); + if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) + continue; + emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]); } } static unsigned tgsi_to_pipe_tex_target(unsigned tgsi_target) { switch (tgsi_target) { case TGSI_TEXTURE_BUFFER: return PIPE_BUFFER; case TGSI_TEXTURE_1D: diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c4e7f225a8f..1a1a70e23ea 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1132,39 +1132,40 @@ static LLVMValueRef fetch_input_tes( base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg); return buffer_load(bld_base, type, swizzle, buffer, base, addr, true); } static void store_output_tcs(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, + unsigned index, LLVMValueRef dst[4]) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = &ctx->gallivm; - const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info; unsigned chan_index; LLVMValueRef dw_addr, stride; LLVMValueRef buffer, base, buf_addr; LLVMValueRef values[4]; bool skip_lds_store; bool is_tess_factor = false, is_tess_inner = false; /* Only handle per-patch and per-vertex outputs here. * Vectors will be lowered to scalars and this function will be called again. */ if (reg->Register.File != TGSI_FILE_OUTPUT || (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) { - si_llvm_emit_store(bld_base, inst, info, dst); + si_llvm_emit_store(bld_base, inst, info, index, dst); return; } if (reg->Register.Dimension) { stride = get_tcs_out_vertex_dw_stride(ctx); dw_addr = get_tcs_out_current_patch_offset(ctx); dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr); skip_lds_store = !sh_info->reads_pervertex_outputs; } else { dw_addr = get_tcs_out_current_patch_data_offset(ctx); @@ -1184,54 +1185,55 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, is_tess_inner = name == TGSI_SEMANTIC_TESSINNER; } } } buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k); base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset); buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL); - - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { + uint32_t writemask = reg->Register.WriteMask; + while (writemask) { + chan_index = u_bit_scan(&writemask); LLVMValueRef value = dst[chan_index]; if (inst->Instruction.Saturate) value = ac_build_clamp(&ctx->ac, value); /* Skip LDS stores if there is no LDS read of this output. */ if (!skip_lds_store) lds_store(bld_base, chan_index, dw_addr, value); value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); values[chan_index] = value; - if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) { + if (reg->Register.WriteMask != 0xF && !is_tess_factor) { ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, buf_addr, base, 4 * chan_index, 1, 0, true, false); } /* Write tess factors into VGPRs for the epilog. */ if (is_tess_factor && ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) { if (!is_tess_inner) { LLVMBuildStore(gallivm->builder, value, /* outer */ ctx->invoc0_tess_factors[chan_index]); } else if (chan_index < 2) { LLVMBuildStore(gallivm->builder, value, /* inner */ ctx->invoc0_tess_factors[4 + chan_index]); } } } - if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) { + if (reg->Register.WriteMask == 0xF && !is_tess_factor) { LLVMValueRef value = lp_build_gather_values(gallivm, values, 4); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr, base, 0, 1, 0, true, false); } } static LLVMValueRef fetch_input_gs( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 023f9a6a093..141dd34be3d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -282,20 +282,21 @@ LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, LLVMValueRef ptr2); LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *reg, enum tgsi_opcode_type type, unsigned swizzle); void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, + unsigned index, LLVMValueRef dst[4]); /* Combine these with & instead of |. */ #define NOOP_WAITCNT 0xf7f #define LGKM_CNT 0x07f #define VM_CNT 0xf70 void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16); LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c index 231f16f049d..67172729bb6 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c @@ -874,48 +874,52 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base, break; default: break; } } void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, const struct tgsi_opcode_info *info, + unsigned index, LLVMValueRef dst[4]) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = &ctx->gallivm; - const struct tgsi_full_dst_register *reg = &inst->Dst[0]; + const struct tgsi_full_dst_register *reg = &inst->Dst[index]; LLVMBuilderRef builder = ctx->gallivm.builder; LLVMValueRef temp_ptr, temp_ptr2 = NULL; - unsigned chan, chan_index; bool is_vec_store = false; enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); if (dst[0]) { LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); is_vec_store = (k == LLVMVectorTypeKind); } if (is_vec_store) { LLVMValueRef values[4] = {}; - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) { + uint32_t writemask = reg->Register.WriteMask; + while (writemask) { + unsigned chan = u_bit_scan(&writemask); LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0); values[chan] = LLVMBuildExtractElement(gallivm->builder, dst[0], index, ""); } - bld_base->emit_store(bld_base, inst, info, values); + bld_base->emit_store(bld_base, inst, info, index, values); return; } - TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { + uint32_t writemask = reg->Register.WriteMask; + while (writemask) { + unsigned chan_index = u_bit_scan(&writemask); LLVMValueRef value = dst[chan_index]; if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) continue; if (inst->Instruction.Saturate) value = ac_build_clamp(&ctx->ac, value); if (reg->Register.File == TGSI_FILE_ADDRESS) { temp_ptr = ctx->addrs[reg->Register.Index][chan_index]; LLVMBuildStore(builder, value, temp_ptr); -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev