From: Nicolai Hähnle <nicolai.haeh...@amd.com>

---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c         |  2 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h         |  1 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c     | 15 +++++++--------
 src/gallium/drivers/radeonsi/si_shader.c            | 14 ++++++++------
 src/gallium/drivers/radeonsi/si_shader_internal.h   |  1 +
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 14 +++++++++-----
 6 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index e450092a82c..66f752989ab 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -306,21 +306,21 @@ lp_build_tgsi_inst_llvm(
       if (info->output_mode == TGSI_OUTPUT_REPLICATE && bld_base->soa) {
          val = emit_data.output[0];
          memset(emit_data.output, 0, sizeof(emit_data.output));
          TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
             emit_data.output[chan_index] = val;
          }
       }
    }
 
    if (info->num_dst > 0 && info->opcode != TGSI_OPCODE_STORE) {
-      bld_base->emit_store(bld_base, inst, info, emit_data.output);
+      bld_base->emit_store(bld_base, inst, info, 0, emit_data.output);
    }
    return TRUE;
 }
 
 
 LLVMValueRef
 lp_build_emit_fetch_src(
    struct lp_build_tgsi_context *bld_base,
    const struct tgsi_full_src_register *reg,
    enum tgsi_opcode_type stype,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index eb632b700ab..eeeea507810 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -363,20 +363,21 @@ struct lp_build_tgsi_context
                          LLVMValueRef, unsigned, unsigned, unsigned, unsigned);
 
 
    void (*emit_debug)(struct lp_build_tgsi_context *,
                       const struct tgsi_full_instruction *,
                       const struct tgsi_opcode_info *);
 
    void (*emit_store)(struct lp_build_tgsi_context *,
                       const struct tgsi_full_instruction *,
                       const struct tgsi_opcode_info *,
+                      unsigned index,
                       LLVMValueRef dst[4]);
 
    void (*emit_declaration)(struct lp_build_tgsi_context *,
                              const struct tgsi_full_declaration *decl);
 
    void (*emit_immediate)(struct lp_build_tgsi_context *,
                           const struct tgsi_full_immediate *imm);
 
 
    /* Allow the user to store data in this structure rather than passing it
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index f16c579f38d..45110e8b9fe 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1906,33 +1906,32 @@ emit_debug(
          lp_build_print_value(gallivm, "    mask = ", 
bld->exec_mask.exec_mask);
       }
    }
 }
 
 static void
 emit_store(
    struct lp_build_tgsi_context * bld_base,
    const struct tgsi_full_instruction * inst,
    const struct tgsi_opcode_info * info,
+   unsigned index,
    LLVMValueRef dst[4])
 
 {
-   unsigned chan_index;
    enum tgsi_opcode_type dtype = 
tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 
-   if(info->num_dst) {
-      TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-
-         if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
-             continue;
-         emit_store_chan(bld_base, inst, 0, chan_index, dst[chan_index]);
-      }
+   unsigned writemask = inst->Dst[index].Register.WriteMask;
+   while (writemask) {
+      unsigned chan_index = u_bit_scan(&writemask);
+      if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
+          continue;
+      emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
    }
 }
 
 static unsigned
 tgsi_to_pipe_tex_target(unsigned tgsi_target)
 {
    switch (tgsi_target) {
    case TGSI_TEXTURE_BUFFER:
       return PIPE_BUFFER;
    case TGSI_TEXTURE_1D:
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index c4e7f225a8f..1a1a70e23ea 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1132,39 +1132,40 @@ static LLVMValueRef fetch_input_tes(
 
        base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
        addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
 
        return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
 }
 
 static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                             const struct tgsi_full_instruction *inst,
                             const struct tgsi_opcode_info *info,
+                            unsigned index,
                             LLVMValueRef dst[4])
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = &ctx->gallivm;
-       const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+       const struct tgsi_full_dst_register *reg = &inst->Dst[index];
        const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
        unsigned chan_index;
        LLVMValueRef dw_addr, stride;
        LLVMValueRef buffer, base, buf_addr;
        LLVMValueRef values[4];
        bool skip_lds_store;
        bool is_tess_factor = false, is_tess_inner = false;
 
        /* Only handle per-patch and per-vertex outputs here.
         * Vectors will be lowered to scalars and this function will be called 
again.
         */
        if (reg->Register.File != TGSI_FILE_OUTPUT ||
            (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == 
LLVMVectorTypeKind)) {
-               si_llvm_emit_store(bld_base, inst, info, dst);
+               si_llvm_emit_store(bld_base, inst, info, index, dst);
                return;
        }
 
        if (reg->Register.Dimension) {
                stride = get_tcs_out_vertex_dw_stride(ctx);
                dw_addr = get_tcs_out_current_patch_offset(ctx);
                dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr);
                skip_lds_store = !sh_info->reads_pervertex_outputs;
        } else {
                dw_addr = get_tcs_out_current_patch_data_offset(ctx);
@@ -1184,54 +1185,55 @@ static void store_output_tcs(struct 
lp_build_tgsi_context *bld_base,
                                is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
                        }
                }
        }
 
        buffer = desc_from_addr_base64k(ctx, 
ctx->param_tcs_offchip_addr_base64k);
 
        base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
        buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL);
 
-
-       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+       uint32_t writemask = reg->Register.WriteMask;
+       while (writemask) {
+               chan_index = u_bit_scan(&writemask);
                LLVMValueRef value = dst[chan_index];
 
                if (inst->Instruction.Saturate)
                        value = ac_build_clamp(&ctx->ac, value);
 
                /* Skip LDS stores if there is no LDS read of this output. */
                if (!skip_lds_store)
                        lds_store(bld_base, chan_index, dw_addr, value);
 
                value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
                values[chan_index] = value;
 
-               if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) {
+               if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
                        ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
                                                    buf_addr, base,
                                                    4 * chan_index, 1, 0, true, 
false);
                }
 
                /* Write tess factors into VGPRs for the epilog. */
                if (is_tess_factor &&
                    
ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
                        if (!is_tess_inner) {
                                LLVMBuildStore(gallivm->builder, value, /* 
outer */
                                               
ctx->invoc0_tess_factors[chan_index]);
                        } else if (chan_index < 2) {
                                LLVMBuildStore(gallivm->builder, value, /* 
inner */
                                               ctx->invoc0_tess_factors[4 + 
chan_index]);
                        }
                }
        }
 
-       if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
+       if (reg->Register.WriteMask == 0xF && !is_tess_factor) {
                LLVMValueRef value = lp_build_gather_values(gallivm,
                                                            values, 4);
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, 
buf_addr,
                                            base, 0, 1, 0, true, false);
        }
 }
 
 static LLVMValueRef fetch_input_gs(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *reg,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 023f9a6a093..141dd34be3d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -282,20 +282,21 @@ LLVMValueRef si_llvm_emit_fetch_64bit(struct 
lp_build_tgsi_context *bld_base,
                                      LLVMValueRef ptr2);
 
 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                                const struct tgsi_full_src_register *reg,
                                enum tgsi_opcode_type type,
                                unsigned swizzle);
 
 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
                        const struct tgsi_full_instruction *inst,
                        const struct tgsi_opcode_info *info,
+                       unsigned index,
                        LLVMValueRef dst[4]);
 
 /* Combine these with & instead of |. */
 #define NOOP_WAITCNT 0xf7f
 #define LGKM_CNT 0x07f
 #define VM_CNT 0xf70
 
 void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16);
 
 LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 231f16f049d..67172729bb6 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -874,48 +874,52 @@ static void emit_declaration(struct lp_build_tgsi_context 
*bld_base,
                break;
 
        default:
                break;
        }
 }
 
 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
                        const struct tgsi_full_instruction *inst,
                        const struct tgsi_opcode_info *info,
+                       unsigned index,
                        LLVMValueRef dst[4])
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = &ctx->gallivm;
-       const struct tgsi_full_dst_register *reg = &inst->Dst[0];
+       const struct tgsi_full_dst_register *reg = &inst->Dst[index];
        LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef temp_ptr, temp_ptr2 = NULL;
-       unsigned chan, chan_index;
        bool is_vec_store = false;
        enum tgsi_opcode_type dtype = 
tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 
        if (dst[0]) {
                LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
                is_vec_store = (k == LLVMVectorTypeKind);
        }
 
        if (is_vec_store) {
                LLVMValueRef values[4] = {};
-               TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
+               uint32_t writemask = reg->Register.WriteMask;
+               while (writemask) {
+                       unsigned chan = u_bit_scan(&writemask);
                        LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
                        values[chan]  = 
LLVMBuildExtractElement(gallivm->builder,
                                                        dst[0], index, "");
                }
-               bld_base->emit_store(bld_base, inst, info, values);
+               bld_base->emit_store(bld_base, inst, info, index, values);
                return;
        }
 
-       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+       uint32_t writemask = reg->Register.WriteMask;
+       while (writemask) {
+               unsigned chan_index = u_bit_scan(&writemask);
                LLVMValueRef value = dst[chan_index];
 
                if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index 
== 3))
                        continue;
                if (inst->Instruction.Saturate)
                        value = ac_build_clamp(&ctx->ac, value);
 
                if (reg->Register.File == TGSI_FILE_ADDRESS) {
                        temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
                        LLVMBuildStore(builder, value, temp_ptr);
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to