On Sat, Sep 29, 2012 at 05:19:22PM +0200, Vincent Lejeune wrote: > --- > src/gallium/drivers/r600/r600_llvm.c | 66 ++++++++++++++++++++++---- > src/gallium/drivers/r600/r600_shader.c | 79 > ++++++++++++++++++++++++++++++-- > src/gallium/drivers/radeon/radeon_llvm.h | 4 ++ > 3 files changed, 137 insertions(+), 12 deletions(-) > > diff --git a/src/gallium/drivers/r600/r600_llvm.c > b/src/gallium/drivers/r600/r600_llvm.c > index 71ea578..dddc867 100644 > --- a/src/gallium/drivers/r600/r600_llvm.c > +++ b/src/gallium/drivers/r600/r600_llvm.c > @@ -115,6 +115,8 @@ static void llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); > struct lp_build_context * base = &bld_base->base; > unsigned i; > + > + unsigned color_count = 0; > > /* Add the necessary export instructions */ > for (i = 0; i < ctx->output_reg_count; i++) { > @@ -123,18 +125,66 @@ static void llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > LLVMValueRef output; > unsigned adjusted_reg_idx = i + > ctx->reserved_reg_count; > - LLVMValueRef reg_index = lp_build_const_int32( > - base->gallivm, > - radeon_llvm_reg_index_soa(adjusted_reg_idx, > chan)); > > output = LLVMBuildLoad(base->gallivm->builder, > ctx->soa.outputs[i][chan], ""); > > - lp_build_intrinsic_binary( > - base->gallivm->builder, > - "llvm.AMDGPU.store.output", > - LLVMVoidTypeInContext(base->gallivm->context), > - output, reg_index); > + if (ctx->type == TGSI_PROCESSOR_VERTEX) { > + LLVMValueRef reg_index = lp_build_const_int32( > + base->gallivm, > + > radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); > + lp_build_intrinsic_binary( > + base->gallivm->builder, > + "llvm.AMDGPU.store.output", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, reg_index); > + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { > + switch (ctx->r600_outputs[i].name) { > + case TGSI_SEMANTIC_COLOR: > + if ( color_count/4 < > ctx->color_buffer_count + ctx->extra_buffer) { > + if (ctx->fs_color_all) { > + for (unsigned j = 0; j > < ctx->color_buffer_count; j++) { > + LLVMValueRef > reg_index = lp_build_const_int32( > + > base->gallivm, > + (j * 4) > + chan); > + > lp_build_intrinsic_binary( > + > base->gallivm->builder, > + > "llvm.R600.store.pixel.color", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, > reg_index); > + } > + } else { > + LLVMValueRef reg_index > = lp_build_const_int32( > + base->gallivm, > + > (color_count++/4) * 4 + chan); > + > lp_build_intrinsic_binary( > + > base->gallivm->builder, > + > "llvm.R600.store.pixel.color", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output, > reg_index); > + } > + } > + break; > + case TGSI_SEMANTIC_POSITION: > + if (chan != 2) > + continue; > + lp_build_intrinsic_unary( > + base->gallivm->builder, > + "llvm.R600.store.pixel.depth", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output); > + break; > + case TGSI_SEMANTIC_STENCIL: > + if (chan != 1) > + continue; > + lp_build_intrinsic_unary( > + base->gallivm->builder, > + "llvm.R600.store.pixel.stencil", > + > LLVMVoidTypeInContext(base->gallivm->context), > + output); > + break; > + } > + } > } > } > } > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index bf4877a..a97590a 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -522,6 +522,38 @@ static int r600_vtx_from_byte_stream(struct > r600_shader_ctx *ctx, > return bytes_read; > } > > +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, > + unsigned char * bytes, unsigned bytes_read) > +{ > +struct r600_bytecode_output output; > + memset(&output, 0, sizeof(struct r600_bytecode_output)); > + > + output.gpr = bytes[bytes_read++]; > + output.elem_size = 3; > + output.burst_count = 1; > + output.barrier = 1; > + output.type = -1; > + output.end_of_program = 1; > + if (bytes[bytes_read++] == 0x27) { > + output.inst = > + BC_INST(ctx->bc, > V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); > + output.end_of_program = 0; > + } else { > + output.inst = > + BC_INST(ctx->bc, > V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); > + output.end_of_program = 1; > + } > + output.array_base = bytes[bytes_read++]; > + output.swizzle_x = bytes[bytes_read++]; > + output.swizzle_y = bytes[bytes_read++]; > + output.swizzle_z = bytes[bytes_read++]; > + output.swizzle_w = bytes[bytes_read++]; > + output.type = 0; > + > + r600_bytecode_add_output(ctx->bc, &output); > + return bytes_read; > +} > +
As I mentioned in the previous patch, we should have LLVM emit the real encoding, and then extract the fields to build the struct r600_bytecode object, just like we do for VTX instructions. > static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, > unsigned char * bytes, unsigned num_bytes) > { > @@ -556,6 +588,10 @@ static void r600_bytecode_from_byte_stream(struct > r600_shader_ctx *ctx, > bytes_read = r600_vtx_from_byte_stream(ctx, bytes, > bytes_read); > break; > + case 5: > + bytes_read = r600_export_from_byte_stream(ctx, bytes, > + bytes_read); > + break; > default: > /* XXX: Error here */ > break; > @@ -1336,7 +1372,11 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > radeon_llvm_ctx.two_side = shader->two_side; > radeon_llvm_ctx.face_input = ctx.face_gpr; > radeon_llvm_ctx.r600_inputs = ctx.shader->input; > + radeon_llvm_ctx.r600_outputs = ctx.shader->output; > + radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); > radeon_llvm_ctx.chip_class = ctx.bc->chip_class; > + radeon_llvm_ctx.fs_color_all = shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN); > + radeon_llvm_ctx.extra_buffer = key.dual_src_blend; > mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); > if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { > dump = 1; > @@ -1598,6 +1638,33 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > } > break; > case TGSI_PROCESSOR_FRAGMENT: > + if (use_llvm) { > + if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { > + /* never export more colors than the number of > CBs */ > + if (next_pixel_base && next_pixel_base >= > key.nr_cbufs + key.dual_src_blend) { > + /* skip export */ > + j--; > + continue; > + } > + output[j].swizzle_w = key.alpha_to_one ? 5 : 3; > + next_pixel_base++; > + shader->nr_ps_color_exports++; > + if (shader->fs_write_all && > (rscreen->chip_class >= EVERGREEN)) { > + for (k = 1; k < key.nr_cbufs; k++) { > + shader->nr_ps_color_exports++; > + next_pixel_base++; > + } > + } > + } else if (shader->output[i].name == > TGSI_SEMANTIC_POSITION) { > + continue; > + } else if (shader->output[i].name == > TGSI_SEMANTIC_STENCIL) { > + continue; > + } else { > + R600_ERR("unsupported fragment output name > %d\n", shader->output[i].name); > + r = -EINVAL; > + goto out_err; > + } > + } else { > if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { > /* never export more colors than the number of > CBs */ > if (next_pixel_base && next_pixel_base >= > key.nr_cbufs + key.dual_src_blend) { Is there a cleaner way to separate the LLVM and non-LLVM functionality? There is a lot of duplicated code here. > @@ -1644,6 +1711,7 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > r = -EINVAL; > goto out_err; > } > + } > break; > default: > R600_ERR("unsupported processor type %d\n", ctx.type); > @@ -1706,10 +1774,13 @@ static int r600_shader_from_tgsi(struct r600_screen > *rscreen, > } > } > /* add output to bytecode */ > - for (i = 0; i < noutput; i++) { > - r = r600_bytecode_add_output(ctx.bc, &output[i]); > - if (r) > - goto out_err; > + if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT || > + (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) { > + for (i = 0; i < noutput; i++) { > + r = r600_bytecode_add_output(ctx.bc, &output[i]); > + if (r) > + goto out_err; > + } > } > /* add program end */ > if (ctx.bc->chip_class == CAYMAN) > diff --git a/src/gallium/drivers/radeon/radeon_llvm.h > b/src/gallium/drivers/radeon/radeon_llvm.h > index 6118b11..948e8cf 100644 > --- a/src/gallium/drivers/radeon/radeon_llvm.h > +++ b/src/gallium/drivers/radeon/radeon_llvm.h > @@ -59,6 +59,10 @@ struct radeon_llvm_context { > unsigned face_input; > unsigned two_side; > struct r600_shader_io * r600_inputs; > + struct r600_shader_io * r600_outputs; > + unsigned color_buffer_count; > + unsigned extra_buffer; > + unsigned fs_color_all; > > /*=== Front end configuration ===*/ > > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev