On Wed, Oct 10, 2012 at 08:20:57PM +0200, Vincent Lejeune wrote: > --- > src/gallium/drivers/r600/eg_asm.c | 17 +++++++ > src/gallium/drivers/r600/r600_asm.c | 17 +++++++ > src/gallium/drivers/r600/r600_asm.h | 2 + > src/gallium/drivers/r600/r600_llvm.c | 66 ++++++++++++++++++++++--- > src/gallium/drivers/r600/r600_shader.c | 83 > +++++++++++++++++++++++--------- > src/gallium/drivers/radeon/radeon_llvm.h | 3 ++ > 6 files changed, 157 insertions(+), 31 deletions(-) >
One small coding style issue below, with that fix: Reviewed-by: Tom Stellard <thomas.stell...@amd.com> > diff --git a/src/gallium/drivers/r600/eg_asm.c > b/src/gallium/drivers/r600/eg_asm.c > index 00ac4a8..69617d9 100644 > --- a/src/gallium/drivers/r600/eg_asm.c > +++ b/src/gallium/drivers/r600/eg_asm.c > @@ -144,3 +144,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, > struct r600_bytecode_cf *cf) > } > return 0; > } > + > +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1) > +{ > + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); > + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); > + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); > + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); > + > + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); > + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); > + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); > + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); > + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); > + output->end_of_program = > G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); > + output->inst = > EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); > + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); > +} > diff --git a/src/gallium/drivers/r600/r600_asm.c > b/src/gallium/drivers/r600/r600_asm.c > index 37e8bf0..7d4aa69 100644 > --- a/src/gallium/drivers/r600/r600_asm.c > +++ b/src/gallium/drivers/r600/r600_asm.c > @@ -2938,3 +2938,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu > *alu, uint32_t word0, uint3 > G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); > } > } > + > +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1) > +{ > + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); > + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); > + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); > + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); > + > + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); > + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); > + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); > + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); > + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); > + output->end_of_program = > G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); > + output->inst = > EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); > + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); > +} > diff --git a/src/gallium/drivers/r600/r600_asm.h > b/src/gallium/drivers/r600/r600_asm.h > index d8f258f..8009d97 100644 > --- a/src/gallium/drivers/r600/r600_asm.h > +++ b/src/gallium/drivers/r600/r600_asm.h > @@ -244,5 +244,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context > *ctx, > void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct > r600_bytecode_cf *cf); > int r700_bytecode_alu_build(struct r600_bytecode *bc, struct > r600_bytecode_alu *alu, unsigned id); > void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, > uint32_t word1); > +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1); > +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t > word0, uint32_t word1); > > #endif > diff --git a/src/gallium/drivers/r600/r600_llvm.c > b/src/gallium/drivers/r600/r600_llvm.c > index c6e60af..85289d5 100644 > --- a/src/gallium/drivers/r600/r600_llvm.c > +++ b/src/gallium/drivers/r600/r600_llvm.c > @@ -151,6 +151,8 @@ static void llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); > struct lp_build_context * base = &bld_base->base; > unsigned i; > + > + unsigned color_count = 0; > > /* Add the necessary export instructions */ > for (i = 0; i < ctx->output_reg_count; i++) { > @@ -159,18 +161,66 @@ static void llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > LLVMValueRef output; > unsigned adjusted_reg_idx = i + > ctx->reserved_reg_count; > - LLVMValueRef reg_index = lp_build_const_int32( > - base->gallivm, > - radeon_llvm_reg_index_soa(adjusted_reg_idx, > chan)); > > output = LLVMBuildLoad(base->gallivm->builder, > ctx->soa.outputs[i][chan], ""); > > - lp_build_intrinsic_binary( > - base->gallivm->builder, > - "llvm.AMDGPU.store.output", > - LLVMVoidTypeInContext(base->gallivm->context), > - output, reg_index); > + if (ctx->type == TGSI_PROCESSOR_VERTEX) { > + LLVMValueRef reg_index = lp_build_const_int32( > + base->gallivm, > + > radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); > + lp_build_intrinsic_binary( > + base->gallivm->builder, > + "llvm.AMDGPU.store.output", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, reg_index); > + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { > + switch (ctx->r600_outputs[i].name) { > + case TGSI_SEMANTIC_COLOR: > + if ( color_count/4 < > ctx->color_buffer_count) { > + if (ctx->fs_color_all) { > + for (unsigned j = 0; j > < ctx->color_buffer_count; j++) { > + LLVMValueRef > reg_index = lp_build_const_int32( > + > base->gallivm, > + (j * 4) > + chan); > + > lp_build_intrinsic_binary( > + > base->gallivm->builder, > + > "llvm.R600.store.pixel.color", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, > reg_index); > + } > + } else { > + LLVMValueRef reg_index > = lp_build_const_int32( > + base->gallivm, > + > (color_count++/4) * 4 + chan); > + > lp_build_intrinsic_binary( > + > base->gallivm->builder, > + > "llvm.R600.store.pixel.color", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, > reg_index); > + } > + } > + break; > + case TGSI_SEMANTIC_POSITION: > + if (chan != 2) > + continue; > + lp_build_intrinsic_unary( > + base->gallivm->builder, > + "llvm.R600.store.pixel.depth", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output); > + break; > + case TGSI_SEMANTIC_STENCIL: > + if (chan != 1) > + continue; > + lp_build_intrinsic_unary( > + base->gallivm->builder, > + "llvm.R600.store.pixel.stencil", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output); > + break; > + } > + } > } > } > } > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 0b45d4f..81745af 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -522,6 +522,21 @@ static int r600_vtx_from_byte_stream(struct > r600_shader_ctx *ctx, > return bytes_read; > } > > +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, > + unsigned char * bytes, unsigned bytes_read) > +{ > + struct r600_bytecode_output output; > + memset(&output, 0, sizeof(struct r600_bytecode_output)); > + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); > + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); > + if (ctx->bc->chip_class >= EVERGREEN) > + eg_bytecode_export_read(&output, word0,word1); > + else > + r600_bytecode_export_read(&output, word0,word1); > + r600_bytecode_add_output(ctx->bc, &output); > + return bytes_read; > +} > + > static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, > unsigned char * bytes, unsigned num_bytes) > { > @@ -556,6 +571,10 @@ static void r600_bytecode_from_byte_stream(struct > r600_shader_ctx *ctx, > bytes_read = r600_vtx_from_byte_stream(ctx, bytes, > bytes_read); > break; > + case 5: > + bytes_read = r600_export_from_byte_stream(ctx, bytes, > + bytes_read); > + break; > default: > /* XXX: Error here */ > break; > @@ -1348,7 +1367,10 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > radeon_llvm_ctx.two_side = shader->two_side; > radeon_llvm_ctx.face_input = ctx.face_gpr; > radeon_llvm_ctx.r600_inputs = ctx.shader->input; > + radeon_llvm_ctx.r600_outputs = ctx.shader->output; > + radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); > radeon_llvm_ctx.chip_class = ctx.bc->chip_class; > + radeon_llvm_ctx.fs_color_all = shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN); > mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); > if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { > dump = 1; > @@ -1617,28 +1639,40 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > j--; > continue; > } > - output[j].swizzle_w = key.alpha_to_one ? 5 : 3; > - output[j].array_base = next_pixel_base++; > - output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > + next_pixel_base++; > shader->nr_ps_color_exports++; > - if (shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN)) { > - for (k = 1; k < key.nr_cbufs; k++) { > - j++; > - memset(&output[j], 0, > sizeof(struct r600_bytecode_output)); > - output[j].gpr = > shader->output[i].gpr; > - output[j].elem_size = 3; > - output[j].swizzle_x = 0; > - output[j].swizzle_y = 1; > - output[j].swizzle_z = 2; > - output[j].swizzle_w = > key.alpha_to_one ? 5 : 3; > - output[j].burst_count = 1; > - output[j].barrier = 1; > - output[j].array_base = > next_pixel_base++; > - output[j].inst = > BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); > - output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > - shader->nr_ps_color_exports++; > + if (use_llvm) { > + if (shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN) && key.nr_cbufs) { > + shader->nr_ps_color_exports += > key.nr_cbufs - 1; > + next_pixel_base += key.nr_cbufs > - 1; > + } else { > + continue; > } > + } else { > + output[j].swizzle_w = key.alpha_to_one > ? 5 : 3; > + output[j].array_base = next_pixel_base > - 1; > + output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > + if (shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN)) { > + for (k = 1; k < key.nr_cbufs; > k++) { > + j++; > + memset(&output[j], 0, > sizeof(struct r600_bytecode_output)); > + output[j].gpr = > shader->output[i].gpr; > + output[j].elem_size = 3; > + output[j].swizzle_x = 0; > + output[j].swizzle_y = 1; > + output[j].swizzle_z = 2; > + output[j].swizzle_w = > key.alpha_to_one ? 5 : 3; > + output[j].burst_count = > 1; > + output[j].barrier = 1; > + output[j].array_base = > next_pixel_base++; > + output[j].inst = > BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); > + output[j].type = > V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; > + > shader->nr_ps_color_exports++; > + } > + } > } > + } else if (use_llvm) { > + continue; > } else if (shader->output[i].name == > TGSI_SEMANTIC_POSITION) { > output[j].array_base = 61; > output[j].swizzle_x = 2; > @@ -1718,10 +1752,13 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > } > } > /* add output to bytecode */ > - for (i = 0; i < noutput; i++) { > - r = r600_bytecode_add_output(ctx.bc, &output[i]); > - if (r) > - goto out_err; > + if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT || > + (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) { > + for (i = 0; i < noutput; i++) { > + r = r600_bytecode_add_output(ctx.bc, &output[i]); > + if (r) > + goto out_err; > + } The indentation looks off here. > } > /* add program end */ > if (ctx.bc->chip_class == CAYMAN) > diff --git a/src/gallium/drivers/radeon/radeon_llvm.h > b/src/gallium/drivers/radeon/radeon_llvm.h > index 6118b11..61975c4 100644 > --- a/src/gallium/drivers/radeon/radeon_llvm.h > +++ b/src/gallium/drivers/radeon/radeon_llvm.h > @@ -59,6 +59,9 @@ struct radeon_llvm_context { > unsigned face_input; > unsigned two_side; > struct r600_shader_io * r600_inputs; > + struct r600_shader_io * r600_outputs; > + unsigned color_buffer_count; > + unsigned fs_color_all; > > /*=== Front end configuration ===*/ > > -- > 1.7.11.7 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev