---
 src/gallium/drivers/r600/eg_asm.c        | 17 +++++++
 src/gallium/drivers/r600/r600_asm.c      | 17 +++++++
 src/gallium/drivers/r600/r600_asm.h      |  2 +
 src/gallium/drivers/r600/r600_llvm.c     | 66 ++++++++++++++++++++++---
 src/gallium/drivers/r600/r600_shader.c   | 83 +++++++++++++++++++++++---------
 src/gallium/drivers/radeon/radeon_llvm.h |  3 ++
 6 files changed, 157 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c 
b/src/gallium/drivers/r600/eg_asm.c
index 00ac4a8..69617d9 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -144,3 +144,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct 
r600_bytecode_cf *cf)
        }
        return 0;
 }
+
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t 
word0, uint32_t word1)
+{
+       output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+       output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+       output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+       output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+       output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+       output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+       output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+       output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+       output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+       output->end_of_program = 
G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+       output->inst = 
EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+       output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.c 
b/src/gallium/drivers/r600/r600_asm.c
index 37e8bf0..7d4aa69 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2938,3 +2938,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu 
*alu, uint32_t word0, uint3
                        G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
        }
 }
+
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t 
word0, uint32_t word1)
+{
+       output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+       output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+       output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+       output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+       output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+       output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+       output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+       output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+       output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+       output->end_of_program = 
G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+       output->inst = 
EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+       output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
diff --git a/src/gallium/drivers/r600/r600_asm.h 
b/src/gallium/drivers/r600/r600_asm.h
index d8f258f..8009d97 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -244,5 +244,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context 
*ctx,
 void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct 
r600_bytecode_cf *cf);
 int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu 
*alu, unsigned id);
 void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, 
uint32_t word1);
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t 
word0, uint32_t word1);
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t 
word0, uint32_t word1);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_llvm.c 
b/src/gallium/drivers/r600/r600_llvm.c
index c6e60af..85289d5 100644
--- a/src/gallium/drivers/r600/r600_llvm.c
+++ b/src/gallium/drivers/r600/r600_llvm.c
@@ -151,6 +151,8 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context 
* bld_base)
        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
        struct lp_build_context * base = &bld_base->base;
        unsigned i;
+       
+       unsigned color_count = 0;
 
        /* Add the necessary export instructions */
        for (i = 0; i < ctx->output_reg_count; i++) {
@@ -159,18 +161,66 @@ static void llvm_emit_epilogue(struct 
lp_build_tgsi_context * bld_base)
                        LLVMValueRef output;
                        unsigned adjusted_reg_idx = i +
                                        ctx->reserved_reg_count;
-                       LLVMValueRef reg_index = lp_build_const_int32(
-                               base->gallivm,
-                               radeon_llvm_reg_index_soa(adjusted_reg_idx, 
chan));
 
                        output = LLVMBuildLoad(base->gallivm->builder,
                                ctx->soa.outputs[i][chan], "");
 
-                       lp_build_intrinsic_binary(
-                               base->gallivm->builder,
-                               "llvm.AMDGPU.store.output",
-                               LLVMVoidTypeInContext(base->gallivm->context),
-                               output, reg_index);
+                       if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                               LLVMValueRef reg_index = lp_build_const_int32(
+                                       base->gallivm,
+                                       
radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
+                               lp_build_intrinsic_binary(
+                                       base->gallivm->builder,
+                                       "llvm.AMDGPU.store.output",
+                                       
LLVMVoidTypeInContext(base->gallivm->context),
+                                       output, reg_index);
+                       } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                               switch (ctx->r600_outputs[i].name) {
+                               case TGSI_SEMANTIC_COLOR:
+                                       if ( color_count/4 < 
ctx->color_buffer_count) {
+                                               if (ctx->fs_color_all) {
+                                                       for (unsigned j = 0; j 
< ctx->color_buffer_count; j++) {
+                                                               LLVMValueRef 
reg_index = lp_build_const_int32(
+                                                                       
base->gallivm,
+                                                                       (j * 4) 
+ chan);
+                                                               
lp_build_intrinsic_binary(
+                                                                       
base->gallivm->builder,
+                                                                       
"llvm.R600.store.pixel.color",
+                                                                       
LLVMVoidTypeInContext(base->gallivm->context),
+                                                                       output, 
reg_index);
+                                                       }
+                                               } else {
+                                                       LLVMValueRef reg_index 
= lp_build_const_int32(
+                                                               base->gallivm,
+                                                               
(color_count++/4) * 4 + chan);
+                                                       
lp_build_intrinsic_binary(
+                                                               
base->gallivm->builder,
+                                                               
"llvm.R600.store.pixel.color",
+                                                               
LLVMVoidTypeInContext(base->gallivm->context),
+                                                               output, 
reg_index);
+                                               }
+                                       }
+                                       break;
+                               case TGSI_SEMANTIC_POSITION:
+                                       if (chan != 2)
+                                               continue;
+                                       lp_build_intrinsic_unary(
+                                               base->gallivm->builder,
+                                               "llvm.R600.store.pixel.depth",
+                                               
LLVMVoidTypeInContext(base->gallivm->context),
+                                               output);
+                                       break;
+                               case TGSI_SEMANTIC_STENCIL:
+                                       if (chan != 1)
+                                               continue;
+                                       lp_build_intrinsic_unary(
+                                               base->gallivm->builder,
+                                               "llvm.R600.store.pixel.stencil",
+                                               
LLVMVoidTypeInContext(base->gallivm->context),
+                                               output);
+                                       break;
+                               }
+                       }
                }
        }
 }
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 0b45d4f..81745af 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -522,6 +522,21 @@ static int r600_vtx_from_byte_stream(struct 
r600_shader_ctx *ctx,
        return bytes_read;
 }
 
+static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
+       unsigned char * bytes, unsigned bytes_read)
+{
+       struct r600_bytecode_output output;
+       memset(&output, 0, sizeof(struct r600_bytecode_output));
+       uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
+       uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
+       if (ctx->bc->chip_class >= EVERGREEN)
+               eg_bytecode_export_read(&output, word0,word1);
+       else
+               r600_bytecode_export_read(&output, word0,word1);
+       r600_bytecode_add_output(ctx->bc, &output);
+       return bytes_read;
+}
+
 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                                unsigned char * bytes,  unsigned num_bytes)
 {
@@ -556,6 +571,10 @@ static void r600_bytecode_from_byte_stream(struct 
r600_shader_ctx *ctx,
                        bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
                                                                bytes_read);
                        break;
+               case 5:
+            bytes_read = r600_export_from_byte_stream(ctx, bytes,
+                                bytes_read);
+            break;
                default:
                        /* XXX: Error here */
                        break;
@@ -1348,7 +1367,10 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
                radeon_llvm_ctx.two_side = shader->two_side;
                radeon_llvm_ctx.face_input = ctx.face_gpr;
                radeon_llvm_ctx.r600_inputs = ctx.shader->input;
+               radeon_llvm_ctx.r600_outputs = ctx.shader->output;
+               radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
                radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
+               radeon_llvm_ctx.fs_color_all = shader->fs_write_all && 
(rscreen->chip_class >= EVERGREEN);
                mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
                if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
                        dump = 1;
@@ -1617,28 +1639,40 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
                                        j--;
                                        continue;
                                }
-                               output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
-                               output[j].array_base = next_pixel_base++;
-                               output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               next_pixel_base++;
                                shader->nr_ps_color_exports++;
-                               if (shader->fs_write_all && 
(rscreen->chip_class >= EVERGREEN)) {
-                                       for (k = 1; k < key.nr_cbufs; k++) {
-                                               j++;
-                                               memset(&output[j], 0, 
sizeof(struct r600_bytecode_output));
-                                               output[j].gpr = 
shader->output[i].gpr;
-                                               output[j].elem_size = 3;
-                                               output[j].swizzle_x = 0;
-                                               output[j].swizzle_y = 1;
-                                               output[j].swizzle_z = 2;
-                                               output[j].swizzle_w = 
key.alpha_to_one ? 5 : 3;
-                                               output[j].burst_count = 1;
-                                               output[j].barrier = 1;
-                                               output[j].array_base = 
next_pixel_base++;
-                                               output[j].inst = 
BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
-                                               output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
-                                               shader->nr_ps_color_exports++;
+                               if (use_llvm) {
+                                       if (shader->fs_write_all && 
(rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) {
+                                               shader->nr_ps_color_exports += 
key.nr_cbufs - 1;
+                                               next_pixel_base += key.nr_cbufs 
- 1;
+                                       } else {
+                                               continue;
                                        }
+                               } else {
+                                       output[j].swizzle_w = key.alpha_to_one 
? 5 : 3;
+                                       output[j].array_base = next_pixel_base 
- 1;
+                                       output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                       if (shader->fs_write_all && 
(rscreen->chip_class >= EVERGREEN)) {
+                                               for (k = 1; k < key.nr_cbufs; 
k++) {
+                                                       j++;
+                                                       memset(&output[j], 0, 
sizeof(struct r600_bytecode_output));
+                                                       output[j].gpr = 
shader->output[i].gpr;
+                                                       output[j].elem_size = 3;
+                                                       output[j].swizzle_x = 0;
+                                                       output[j].swizzle_y = 1;
+                                                       output[j].swizzle_z = 2;
+                                                       output[j].swizzle_w = 
key.alpha_to_one ? 5 : 3;
+                                                       output[j].burst_count = 
1;
+                                                       output[j].barrier = 1;
+                                                       output[j].array_base = 
next_pixel_base++;
+                                                       output[j].inst = 
BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+                                                       output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                                                       
shader->nr_ps_color_exports++;
+                                               }
+                                        }
                                }
+                       } else if (use_llvm) {
+                               continue;
                        } else if (shader->output[i].name == 
TGSI_SEMANTIC_POSITION) {
                                output[j].array_base = 61;
                                output[j].swizzle_x = 2;
@@ -1718,10 +1752,13 @@ static int r600_shader_from_tgsi(struct r600_screen 
*rscreen,
                }
        }
        /* add output to bytecode */
-       for (i = 0; i < noutput; i++) {
-               r = r600_bytecode_add_output(ctx.bc, &output[i]);
-               if (r)
-                       goto out_err;
+       if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT ||
+               (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) {
+               for (i = 0; i < noutput; i++) {
+                       r = r600_bytecode_add_output(ctx.bc, &output[i]);
+                       if (r)
+                               goto out_err;
+               }
        }
        /* add program end */
        if (ctx.bc->chip_class == CAYMAN)
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
b/src/gallium/drivers/radeon/radeon_llvm.h
index 6118b11..61975c4 100644
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -59,6 +59,9 @@ struct radeon_llvm_context {
        unsigned face_input;
        unsigned two_side;
        struct r600_shader_io * r600_inputs;
+       struct r600_shader_io * r600_outputs;
+       unsigned color_buffer_count;
+       unsigned fs_color_all;
 
        /*=== Front end configuration ===*/
 
-- 
1.7.11.7

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to