On Sat, Sep 29, 2012 at 05:19:22PM +0200, Vincent Lejeune wrote:
> ---
>  src/gallium/drivers/r600/r600_llvm.c     | 66 ++++++++++++++++++++++----
>  src/gallium/drivers/r600/r600_shader.c   | 79 
> ++++++++++++++++++++++++++++++--
>  src/gallium/drivers/radeon/radeon_llvm.h |  4 ++
>  3 files changed, 137 insertions(+), 12 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_llvm.c 
> b/src/gallium/drivers/r600/r600_llvm.c
> index 71ea578..dddc867 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -115,6 +115,8 @@ static void llvm_emit_epilogue(struct 
> lp_build_tgsi_context * bld_base)
>       struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
>       struct lp_build_context * base = &bld_base->base;
>       unsigned i;
> +     
> +     unsigned color_count = 0;
>  
>       /* Add the necessary export instructions */
>       for (i = 0; i < ctx->output_reg_count; i++) {
> @@ -123,18 +125,66 @@ static void llvm_emit_epilogue(struct 
> lp_build_tgsi_context * bld_base)
>                       LLVMValueRef output;
>                       unsigned adjusted_reg_idx = i +
>                                       ctx->reserved_reg_count;
> -                     LLVMValueRef reg_index = lp_build_const_int32(
> -                             base->gallivm,
> -                             radeon_llvm_reg_index_soa(adjusted_reg_idx, 
> chan));
>  
>                       output = LLVMBuildLoad(base->gallivm->builder,
>                               ctx->soa.outputs[i][chan], "");
>  
> -                     lp_build_intrinsic_binary(
> -                             base->gallivm->builder,
> -                             "llvm.AMDGPU.store.output",
> -                             LLVMVoidTypeInContext(base->gallivm->context),
> -                             output, reg_index);
> +                     if (ctx->type == TGSI_PROCESSOR_VERTEX) {
> +                             LLVMValueRef reg_index = lp_build_const_int32(
> +                                     base->gallivm,
> +                                     
> radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
> +                             lp_build_intrinsic_binary(
> +                                     base->gallivm->builder,
> +                                     "llvm.AMDGPU.store.output",
> +                                     
> LLVMVoidTypeInContext(base->gallivm->context),
> +                                     output, reg_index);
> +                     } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> +                             switch (ctx->r600_outputs[i].name) {
> +                             case TGSI_SEMANTIC_COLOR:
> +                                     if ( color_count/4 < 
> ctx->color_buffer_count + ctx->extra_buffer) {
> +                                             if (ctx->fs_color_all) {
> +                                                     for (unsigned j = 0; j 
> < ctx->color_buffer_count; j++) {
> +                                                             LLVMValueRef 
> reg_index = lp_build_const_int32(
> +                                                                     
> base->gallivm,
> +                                                                     (j * 4) 
> + chan);
> +                                                             
> lp_build_intrinsic_binary(
> +                                                                     
> base->gallivm->builder,
> +                                                                     
> "llvm.R600.store.pixel.color",
> +                                                                     
> LLVMVoidTypeInContext(base->gallivm->context),
> +                                                                     output, 
> reg_index);
> +                                                     }
> +                                             } else {
> +                                                     LLVMValueRef reg_index 
> = lp_build_const_int32(
> +                                                             base->gallivm,
> +                                                             
> (color_count++/4) * 4 + chan);
> +                                                     
> lp_build_intrinsic_binary(
> +                                                             
> base->gallivm->builder,
> +                                                             
> "llvm.R600.store.pixel.color",
> +                                                             
> LLVMVoidTypeInContext(base->gallivm->context),
> +                                                             output, 
> reg_index);
> +                                             }
> +                                     }
> +                                     break;
> +                             case TGSI_SEMANTIC_POSITION:
> +                                     if (chan != 2)
> +                                             continue;
> +                                     lp_build_intrinsic_unary(
> +                                             base->gallivm->builder,
> +                                             "llvm.R600.store.pixel.depth",
> +                                             
> LLVMVoidTypeInContext(base->gallivm->context),
> +                                             output);
> +                                     break;
> +                             case TGSI_SEMANTIC_STENCIL:
> +                                     if (chan != 1)
> +                                             continue;
> +                                     lp_build_intrinsic_unary(
> +                                             base->gallivm->builder,
> +                                             "llvm.R600.store.pixel.stencil",
> +                                             
> LLVMVoidTypeInContext(base->gallivm->context),
> +                                             output);
> +                                     break;
> +                             }
> +                     }
>               }
>       }
>  }
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index bf4877a..a97590a 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -522,6 +522,38 @@ static int r600_vtx_from_byte_stream(struct 
> r600_shader_ctx *ctx,
>       return bytes_read;
>  }
>  
> +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
> +     unsigned char * bytes, unsigned bytes_read)
> +{
> +struct r600_bytecode_output output;
> +     memset(&output, 0, sizeof(struct r600_bytecode_output));
> +
> +     output.gpr = bytes[bytes_read++];
> +     output.elem_size = 3;
> +     output.burst_count = 1;
> +     output.barrier = 1;
> +     output.type = -1;
> +     output.end_of_program = 1;
> +     if (bytes[bytes_read++] == 0x27) {
> +             output.inst =
> +                     BC_INST(ctx->bc, 
> V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
> +             output.end_of_program = 0;
> +     } else {
> +             output.inst =
> +                     BC_INST(ctx->bc, 
> V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
> +             output.end_of_program = 1;
> +     }
> +     output.array_base = bytes[bytes_read++];
> +     output.swizzle_x = bytes[bytes_read++];
> +     output.swizzle_y = bytes[bytes_read++];
> +     output.swizzle_z = bytes[bytes_read++];
> +     output.swizzle_w = bytes[bytes_read++];
> +     output.type = 0;
> +
> +     r600_bytecode_add_output(ctx->bc, &output);
> +     return bytes_read;
> +}
> +

As I mentioned in the previous patch, we should have LLVM emit the real
encoding, and then extract the fields to build the struct r600_bytecode
object, just like we do for VTX instructions.

>  static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
>                               unsigned char * bytes,  unsigned num_bytes)
>  {
> @@ -556,6 +588,10 @@ static void r600_bytecode_from_byte_stream(struct 
> r600_shader_ctx *ctx,
>                       bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
>                                                               bytes_read);
>                       break;
> +             case 5:
> +            bytes_read = r600_export_from_byte_stream(ctx, bytes,
> +                                bytes_read);
> +            break;
>               default:
>                       /* XXX: Error here */
>                       break;
> @@ -1336,7 +1372,11 @@ static int r600_shader_from_tgsi(struct r600_screen 
> *rscreen,
>               radeon_llvm_ctx.two_side = shader->two_side;
>               radeon_llvm_ctx.face_input = ctx.face_gpr;
>               radeon_llvm_ctx.r600_inputs = ctx.shader->input;
> +             radeon_llvm_ctx.r600_outputs = ctx.shader->output;
> +             radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
>               radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
> +             radeon_llvm_ctx.fs_color_all = shader->fs_write_all && 
> (rscreen->chip_class >= EVERGREEN);
> +             radeon_llvm_ctx.extra_buffer = key.dual_src_blend;
>               mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
>               if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
>                       dump = 1;
> @@ -1598,6 +1638,33 @@ static int r600_shader_from_tgsi(struct r600_screen 
> *rscreen,
>                       }
>                       break;
>               case TGSI_PROCESSOR_FRAGMENT:
> +             if (use_llvm) {
> +                     if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
> +                             /* never export more colors than the number of 
> CBs */
> +                             if (next_pixel_base && next_pixel_base >= 
> key.nr_cbufs + key.dual_src_blend) {
> +                                     /* skip export */
> +                                     j--;
> +                                     continue;
> +                             }
> +                             output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
> +                             next_pixel_base++;
> +                             shader->nr_ps_color_exports++;
> +                             if (shader->fs_write_all && 
> (rscreen->chip_class >= EVERGREEN)) {
> +                                     for (k = 1; k < key.nr_cbufs; k++) {
> +                                             shader->nr_ps_color_exports++;
> +                                             next_pixel_base++;
> +                                     }
> +                             }
> +                     } else if (shader->output[i].name == 
> TGSI_SEMANTIC_POSITION) {
> +                             continue;
> +                     } else if (shader->output[i].name == 
> TGSI_SEMANTIC_STENCIL) {
> +                             continue;
> +                     } else {
> +                             R600_ERR("unsupported fragment output name 
> %d\n", shader->output[i].name);
> +                             r = -EINVAL;
> +                             goto out_err;
> +                     }
> +             } else {
>                       if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
>                               /* never export more colors than the number of 
> CBs */
>                               if (next_pixel_base && next_pixel_base >= 
> key.nr_cbufs + key.dual_src_blend) {

Is there a cleaner way to separate the LLVM and non-LLVM functionality?
There is a lot of duplicated code here.

> @@ -1644,6 +1711,7 @@ static int r600_shader_from_tgsi(struct r600_screen 
> *rscreen,
>                               r = -EINVAL;
>                               goto out_err;
>                       }
> +             }
>                       break;
>               default:
>                       R600_ERR("unsupported processor type %d\n", ctx.type);
> @@ -1706,10 +1774,13 @@ static int r600_shader_from_tgsi(struct r600_screen 
> *rscreen,
>               }
>       }
>       /* add output to bytecode */
> -     for (i = 0; i < noutput; i++) {
> -             r = r600_bytecode_add_output(ctx.bc, &output[i]);
> -             if (r)
> -                     goto out_err;
> +     if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT ||
> +             (ctx.type == TGSI_PROCESSOR_FRAGMENT && next_pixel_base == 0)) {
> +             for (i = 0; i < noutput; i++) {
> +                     r = r600_bytecode_add_output(ctx.bc, &output[i]);
> +                     if (r)
> +                             goto out_err;
> +             }
>       }
>       /* add program end */
>       if (ctx.bc->chip_class == CAYMAN)
> diff --git a/src/gallium/drivers/radeon/radeon_llvm.h 
> b/src/gallium/drivers/radeon/radeon_llvm.h
> index 6118b11..948e8cf 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm.h
> +++ b/src/gallium/drivers/radeon/radeon_llvm.h
> @@ -59,6 +59,10 @@ struct radeon_llvm_context {
>       unsigned face_input;
>       unsigned two_side;
>       struct r600_shader_io * r600_inputs;
> +     struct r600_shader_io * r600_outputs;
> +     unsigned color_buffer_count;
> +     unsigned extra_buffer;
> +     unsigned fs_color_all;
>  
>       /*=== Front end configuration ===*/
>  
> -- 
> 1.7.11.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to