There already is a function called si_shader_create. I don't think it would be nice to add si_create_shader. Can we choose a better naming here? (for both functions if needed)
Marek On Mon, Oct 6, 2014 at 9:44 PM, Tom Stellard <thomas.stell...@amd.com> wrote: > --- > src/gallium/drivers/radeonsi/si_compute.c | 51 +++++---------- > src/gallium/drivers/radeonsi/si_pipe.c | 2 +- > src/gallium/drivers/radeonsi/si_shader.c | 104 > ++++++++++++++++++------------ > src/gallium/drivers/radeonsi/si_shader.h | 7 ++ > 4 files changed, 88 insertions(+), 76 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c > b/src/gallium/drivers/radeonsi/si_compute.c > index 490845b..a133380 100644 > --- a/src/gallium/drivers/radeonsi/si_compute.c > +++ b/src/gallium/drivers/radeonsi/si_compute.c > @@ -23,14 +23,14 @@ > */ > > #include "util/u_memory.h" > +#include "radeon/r600_pipe_common.h" > +#include "radeon/radeon_elf_util.h" > > #include "radeon/r600_cs.h" > #include "si_pipe.h" > #include "si_shader.h" > #include "sid.h" > > -#include "radeon/radeon_llvm_util.h" > - > #define MAX_GLOBAL_BUFFERS 20 > #define NUM_USER_SGPRS 4 > > @@ -40,14 +40,12 @@ struct si_compute { > unsigned local_size; > unsigned private_size; > unsigned input_size; > - unsigned num_kernels; > - struct si_shader *kernels; > + struct radeon_shader_binary binary; > + struct si_shader program; > unsigned num_user_sgprs; > > struct r600_resource *input_buffer; > struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS]; > - > - LLVMContextRef llvm_ctx; > }; > > static void *si_create_compute_state( > @@ -57,10 +55,7 @@ static void *si_create_compute_state( > struct si_context *sctx = (struct si_context *)ctx; > struct si_compute *program = CALLOC_STRUCT(si_compute); > const struct pipe_llvm_program_header *header; > - const unsigned char *code; > - unsigned i; > - > - program->llvm_ctx = LLVMContextCreate(); > + const char *code; > > header = cso->prog; > code = cso->prog + sizeof(struct pipe_llvm_program_header); > @@ -70,16 +65,9 @@ static void *si_create_compute_state( > program->private_size = cso->req_private_mem; > program->input_size = cso->req_input_mem; > > - program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, > code, > - header->num_bytes); > - program->kernels = CALLOC(sizeof(struct si_shader), > - program->num_kernels); > - for (i = 0; i < program->num_kernels; i++) { > - LLVMModuleRef mod = > radeon_llvm_get_kernel_module(program->llvm_ctx, i, > - code, > header->num_bytes); > - si_compile_llvm(sctx->screen, &program->kernels[i], mod); > - LLVMDisposeModule(mod); > - } > + memset(&program->binary, 0, sizeof(program->binary)); > + radeon_elf_read(code, header->num_bytes, &program->binary, true); > + si_create_shader(sctx->screen, &program->program, &program->binary); > > program->input_buffer = si_resource_create_custom(sctx->b.b.screen, > PIPE_USAGE_IMMUTABLE, program->input_size); > @@ -177,7 +165,7 @@ static void si_launch_grid( > uint64_t shader_va; > unsigned arg_user_sgpr_count = NUM_USER_SGPRS; > unsigned i; > - struct si_shader *shader = &program->kernels[pc]; > + struct si_shader *shader = &program->program; > unsigned lds_blocks; > unsigned num_waves_for_scratch; > > @@ -194,6 +182,9 @@ static void si_launch_grid( > > pm4->compute_pkt = true; > > + /* Read the config informatio */ > + si_shader_binary_read_config(&program->binary, &program->program, pc); > + > /* Upload the kernel arguments */ > > /* The extra num_work_size_bytes are for work group / work item size > information */ > @@ -285,7 +276,7 @@ static void si_launch_grid( > 0x190 /* Default value */); > } > > - shader_va = shader->bo->gpu_address; > + shader_va = shader->bo->gpu_address + pc; > si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, > RADEON_PRIO_SHADER_DATA); > si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & > 0xffffffff); > si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); > @@ -384,22 +375,12 @@ static void si_delete_compute_state(struct pipe_context > *ctx, void* state){ > return; > } > > - if (program->kernels) { > - for (int i = 0; i < program->num_kernels; i++){ > - if (program->kernels[i].bo){ > - si_shader_destroy(ctx, &program->kernels[i]); > - } > - } > - FREE(program->kernels); > - } > - > - if (program->llvm_ctx){ > - LLVMContextDispose(program->llvm_ctx); > - } > pipe_resource_reference( > (struct pipe_resource **)&program->input_buffer, NULL); > > - //And then free the program itself. > + FREE(program->binary.code); > + FREE(program->binary.config); > + FREE(program->binary.rodata); > FREE(program); > } > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index 2cce5cc..ad6f518 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -334,7 +334,7 @@ static int si_get_shader_param(struct pipe_screen* > pscreen, unsigned shader, enu > case PIPE_SHADER_COMPUTE: > switch (param) { > case PIPE_SHADER_CAP_PREFERRED_IR: > - return PIPE_SHADER_IR_LLVM; > + return PIPE_SHADER_IR_NATIVE; > case PIPE_SHADER_CAP_DOUBLES: > return 0; /* XXX: Enable doubles once the compiler can > handle them. */ > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 9d2cc80..401da1b 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -33,6 +33,7 @@ > #include "gallivm/lp_bld_arit.h" > #include "gallivm/lp_bld_flow.h" > #include "radeon/radeon_llvm.h" > +#include "radeon/radeon_elf_util.h" > #include "radeon/radeon_llvm_emit.h" > #include "util/u_memory.h" > #include "tgsi/tgsi_parse.h" > @@ -2625,52 +2626,34 @@ static void preload_streamout_buffers(struct > si_shader_context *si_shader_ctx) > } > } > > -int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, > - LLVMModuleRef mod) > +void si_shader_binary_read_config(const struct radeon_shader_binary *binary, > + struct si_shader *shader, > + unsigned symbol_offset) > { > - unsigned r; /* llvm_compile result */ > unsigned i; > - unsigned char *ptr; > - struct radeon_shader_binary binary; > - bool dump = r600_can_dump_shader(&sscreen->b, > - shader->selector ? shader->selector->tokens : NULL); > - const char * gpu_family = > r600_get_llvm_processor_name(sscreen->b.family); > - unsigned code_size; > - > - /* Use LLVM to compile shader */ > - memset(&binary, 0, sizeof(binary)); > - r = radeon_llvm_compile(mod, &binary, gpu_family, dump); > - > - /* Output binary dump if rscreen->debug_flags are set */ > - if (dump && ! binary.disassembled) { > - fprintf(stderr, "SI CODE:\n"); > - for (i = 0; i < binary.code_size; i+=4 ) { > - fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + > 3], > - binary.code[i + 2], binary.code[i + 1], > - binary.code[i]); > - } > - } > + const unsigned char *config = > + radeon_shader_binary_config_start(binary, symbol_offset); > > /* XXX: We may be able to emit some of these values directly rather > than > * extracting fields to be emitted later. > */ > - /* Parse config data in compiled binary */ > - for (i = 0; i < binary.config_size; i+= 8) { > - unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + > i)); > - unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config > + i + 4)); > + > + for (i = 0; i < binary->config_size_per_symbol; i+= 8) { > + unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i)); > + unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + > 4)); > switch (reg) { > case R_00B028_SPI_SHADER_PGM_RSRC1_PS: > case R_00B128_SPI_SHADER_PGM_RSRC1_VS: > case R_00B228_SPI_SHADER_PGM_RSRC1_GS: > case R_00B848_COMPUTE_PGM_RSRC1: > - shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8; > - shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4; > + shader->num_sgprs = MAX2(shader->num_sgprs, > (G_00B028_SGPRS(value) + 1) * 8); > + shader->num_vgprs = MAX2(shader->num_vgprs, > (G_00B028_VGPRS(value) + 1) * 4); > break; > case R_00B02C_SPI_SHADER_PGM_RSRC2_PS: > - shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value); > + shader->lds_size = MAX2(shader->lds_size, > G_00B02C_EXTRA_LDS_SIZE(value)); > break; > case R_00B84C_COMPUTE_PGM_RSRC2: > - shader->lds_size = G_00B84C_LDS_SIZE(value); > + shader->lds_size = MAX2(shader->lds_size, > G_00B84C_LDS_SIZE(value)); > break; > case R_0286CC_SPI_PS_INPUT_ENA: > shader->spi_ps_input_ena = value; > @@ -2686,9 +2669,32 @@ int si_compile_llvm(struct si_screen *sscreen, struct > si_shader *shader, > break; > } > } > +} > + > +int si_create_shader(struct si_screen *sscreen, > + struct si_shader *shader, > + const struct radeon_shader_binary *binary) > +{ > + > + unsigned i; > + unsigned code_size; > + unsigned char *ptr; > + bool dump = r600_can_dump_shader(&sscreen->b, > + shader->selector ? shader->selector->tokens : NULL); > + > + if (dump && !binary->disassembled) { > + fprintf(stderr, "SI CODE:\n"); > + for (i = 0; i < binary->code_size; i+=4 ) { > + fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, > binary->code[i + 3], > + binary->code[i + 2], binary->code[i + 1], > + binary->code[i]); > + } > + } > + > + si_shader_binary_read_config(binary, shader, 0); > > /* copy new shader */ > - code_size = binary.code_size + binary.rodata_size; > + code_size = binary->code_size + binary->rodata_size; > r600_resource_reference(&shader->bo, NULL); > shader->bo = si_resource_create_custom(&sscreen->b.b, > PIPE_USAGE_IMMUTABLE, > code_size); > @@ -2696,19 +2702,37 @@ int si_compile_llvm(struct si_screen *sscreen, struct > si_shader *shader, > return -ENOMEM; > } > > - ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, > PIPE_TRANSFER_WRITE); > - util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size); > - if (binary.rodata_size > 0) { > - ptr += binary.code_size; > - util_memcpy_cpu_to_le32(ptr, binary.rodata, > binary.rodata_size); > + > + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, > PIPE_TRANSFER_READ_WRITE); > + util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size); > + if (binary->rodata_size > 0) { > + ptr += binary->code_size; > + util_memcpy_cpu_to_le32(ptr, binary->rodata, > binary->rodata_size); > } > > sscreen->b.ws->buffer_unmap(shader->bo->cs_buf); > > - free(binary.code); > - free(binary.config); > - free(binary.rodata); > + return 0; > +} > + > +int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, > + LLVMModuleRef mod) > +{ > + int r = 0; > + struct radeon_shader_binary binary; > + bool dump = r600_can_dump_shader(&sscreen->b, > + shader->selector ? shader->selector->tokens : NULL); > + memset(&binary, 0, sizeof(binary)); > + r = radeon_llvm_compile(mod, &binary, > + r600_get_llvm_processor_name(sscreen->b.family), dump); > > + if (r) { > + return r; > + } > + r = si_create_shader(sscreen, shader, &binary); > + FREE(binary.code); > + FREE(binary.config); > + FREE(binary.rodata); > return r; > } > > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index d8a63df..c616bc4 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -31,6 +31,8 @@ > > #include <llvm-c/Core.h> /* LLVMModuleRef */ > > +struct radeon_shader_binary; > + > #define SI_SGPR_CONST 0 > #define SI_SGPR_SAMPLER 2 > #define SI_SGPR_RESOURCE 4 > @@ -204,5 +206,10 @@ int si_shader_create(struct si_screen *sscreen, struct > si_shader *shader); > int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader, > LLVMModuleRef mod); > void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader); > +int si_create_shader(struct si_screen *sscreen, struct si_shader *shader, > + const struct radeon_shader_binary *binary); > +void si_shader_binary_read_config(const struct radeon_shader_binary *binary, > + struct si_shader *shader, > + unsigned symbol_offset); > > #endif > -- > 1.8.5.5 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev