v2:
  - Drop dependency on LLVM >= 3.5.1
  - Rename si_create_shader() to si_shader_binary_read()
---
 src/gallium/drivers/radeonsi/si_compute.c |  79 ++++++++++++++---------
 src/gallium/drivers/radeonsi/si_pipe.c    |   4 ++
 src/gallium/drivers/radeonsi/si_shader.c  | 104 ++++++++++++++++++------------
 src/gallium/drivers/radeonsi/si_shader.h  |   7 ++
 4 files changed, 123 insertions(+), 71 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index be64418..16beca8 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -23,14 +23,15 @@
  */
 
 #include "util/u_memory.h"
+#include "radeon/r600_pipe_common.h"
+#include "radeon/radeon_elf_util.h"
+#include "radeon/radeon_llvm_util.h"
 
 #include "radeon/r600_cs.h"
 #include "si_pipe.h"
 #include "si_shader.h"
 #include "sid.h"
 
-#include "radeon/radeon_llvm_util.h"
-
 #define MAX_GLOBAL_BUFFERS 20
 #if HAVE_LLVM < 0x0305
 #define NUM_USER_SGPRS 2
@@ -44,14 +45,18 @@ struct si_compute {
        unsigned local_size;
        unsigned private_size;
        unsigned input_size;
-       unsigned num_kernels;
-       struct si_shader *kernels;
+       struct radeon_shader_binary binary;
+       struct si_shader program;
        unsigned num_user_sgprs;
 
        struct r600_resource *input_buffer;
        struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
 
+#if HAVE_LLVM < 0x0306
+       unsigned num_kernels;
+       struct si_shader *kernels;
        LLVMContextRef llvm_ctx;
+#endif
 };
 
 static void *si_create_compute_state(
@@ -61,10 +66,7 @@ static void *si_create_compute_state(
        struct si_context *sctx = (struct si_context *)ctx;
        struct si_compute *program = CALLOC_STRUCT(si_compute);
        const struct pipe_llvm_program_header *header;
-       const unsigned char *code;
-       unsigned i;
-
-       program->llvm_ctx = LLVMContextCreate();
+       const char *code;
 
        header = cso->prog;
        code = cso->prog + sizeof(struct pipe_llvm_program_header);
@@ -74,17 +76,28 @@ static void *si_create_compute_state(
        program->private_size = cso->req_private_mem;
        program->input_size = cso->req_input_mem;
 
-       program->num_kernels = radeon_llvm_get_num_kernels(program->llvm_ctx, 
code,
-                                                       header->num_bytes);
-       program->kernels = CALLOC(sizeof(struct si_shader),
-                                                       program->num_kernels);
-       for (i = 0; i < program->num_kernels; i++) {
-               LLVMModuleRef mod = 
radeon_llvm_get_kernel_module(program->llvm_ctx, i,
-                                                       code, 
header->num_bytes);
-               si_compile_llvm(sctx->screen, &program->kernels[i], mod);
-               LLVMDisposeModule(mod);
+#if HAVE_LLVM < 0x0306
+       {
+               unsigned i;
+               program->llvm_ctx = LLVMContextCreate();
+               program->num_kernels = 
radeon_llvm_get_num_kernels(program->llvm_ctx,
+                                       code, header->num_bytes);
+               program->kernels = CALLOC(sizeof(struct si_shader),
+                                                        program->num_kernels);
+               for (i = 0; i < program->num_kernels; i++) {
+                       LLVMModuleRef mod = 
radeon_llvm_get_kernel_module(program->llvm_ctx, i,
+                                                        code, 
header->num_bytes);
+                       si_compile_llvm(sctx->screen, &program->kernels[i], 
mod);
+                       LLVMDisposeModule(mod);
+               }
        }
+#else
+
+       memset(&program->binary, 0, sizeof(program->binary));
+       radeon_elf_read(code, header->num_bytes, &program->binary, true);
+       si_shader_binary_read(sctx->screen, &program->program, 
&program->binary);
 
+#endif
        program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
                PIPE_USAGE_IMMUTABLE, program->input_size);
 
@@ -181,10 +194,15 @@ static void si_launch_grid(
        uint64_t shader_va;
        unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
        unsigned i;
-       struct si_shader *shader = &program->kernels[pc];
+       struct si_shader *shader = &program->program;
        unsigned lds_blocks;
        unsigned num_waves_for_scratch;
 
+#if HAVE_LLVM < 0x0306
+       shader = &program->kernels[pc];
+#endif
+
+
        radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0) | 
PKT3_SHADER_TYPE_S(1));
        radeon_emit(cs, 0x80000000);
        radeon_emit(cs, 0x80000000);
@@ -198,6 +216,11 @@ static void si_launch_grid(
 
        pm4->compute_pkt = true;
 
+#if HAVE_LLVM >= 0x0306
+       /* Read the config information */
+       si_shader_binary_read_config(&program->binary, &program->program, pc);
+#endif
+
        /* Upload the kernel arguments */
 
        /* The extra num_work_size_bytes are for work group / work item size 
information */
@@ -290,6 +313,10 @@ static void si_launch_grid(
        }
 
        shader_va = shader->bo->gpu_address;
+
+#if HAVE_LLVM >= 0x0306
+       shader_va += pc;
+#endif
        si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, 
RADEON_PRIO_SHADER_DATA);
        si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 
0xffffffff);
        si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
@@ -388,22 +415,12 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
                return;
        }
 
-       if (program->kernels) {
-               for (int i = 0; i < program->num_kernels; i++){
-                       if (program->kernels[i].bo){
-                               si_shader_destroy(ctx, &program->kernels[i]);
-                       }
-               }
-               FREE(program->kernels);
-       }
-
-       if (program->llvm_ctx){
-               LLVMContextDispose(program->llvm_ctx);
-       }
        pipe_resource_reference(
                (struct pipe_resource **)&program->input_buffer, NULL);
 
-       //And then free the program itself.
+       FREE(program->binary.code);
+       FREE(program->binary.config);
+       FREE(program->binary.rodata);
        FREE(program);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index cba6d98..2238c17 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -335,7 +335,11 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
        case PIPE_SHADER_COMPUTE:
                switch (param) {
                case PIPE_SHADER_CAP_PREFERRED_IR:
+#if HAVE_LLVM < 0x0306
                        return PIPE_SHADER_IR_LLVM;
+#else
+                       return PIPE_SHADER_IR_NATIVE;
+#endif
                case PIPE_SHADER_CAP_DOUBLES:
                        return 0; /* XXX: Enable doubles once the compiler can
                                     handle them. */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 6b4f9e6..d15e2f6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -33,6 +33,7 @@
 #include "gallivm/lp_bld_arit.h"
 #include "gallivm/lp_bld_flow.h"
 #include "radeon/radeon_llvm.h"
+#include "radeon/radeon_elf_util.h"
 #include "radeon/radeon_llvm_emit.h"
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
@@ -2499,52 +2500,34 @@ static void preload_streamout_buffers(struct 
si_shader_context *si_shader_ctx)
        }
 }
 
-int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-                   LLVMModuleRef mod)
+void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+                               struct si_shader *shader,
+                               unsigned symbol_offset)
 {
-       unsigned r; /* llvm_compile result */
        unsigned i;
-       unsigned char *ptr;
-       struct radeon_shader_binary binary;
-       bool dump = r600_can_dump_shader(&sscreen->b,
-                       shader->selector ? shader->selector->tokens : NULL);
-       const char * gpu_family = 
r600_get_llvm_processor_name(sscreen->b.family);
-       unsigned code_size;
-
-       /* Use LLVM to compile shader */
-       memset(&binary, 0, sizeof(binary));
-       r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
-
-       /* Output binary dump if rscreen->debug_flags are set */
-       if (dump && ! binary.disassembled) {
-               fprintf(stderr, "SI CODE:\n");
-               for (i = 0; i < binary.code_size; i+=4 ) {
-                       fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 
3],
-                               binary.code[i + 2], binary.code[i + 1],
-                               binary.code[i]);
-               }
-       }
+       const unsigned char *config =
+               radeon_shader_binary_config_start(binary, symbol_offset);
 
        /* XXX: We may be able to emit some of these values directly rather than
         * extracting fields to be emitted later.
         */
-       /* Parse config data in compiled binary */
-       for (i = 0; i < binary.config_size; i+= 8) {
-               unsigned reg = util_le32_to_cpu(*(uint32_t*)(binary.config + 
i));
-               unsigned value = util_le32_to_cpu(*(uint32_t*)(binary.config + 
i + 4));
+
+       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+               unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
+               unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
                switch (reg) {
                case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
                case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
                case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
                case R_00B848_COMPUTE_PGM_RSRC1:
-                       shader->num_sgprs = (G_00B028_SGPRS(value) + 1) * 8;
-                       shader->num_vgprs = (G_00B028_VGPRS(value) + 1) * 4;
+                       shader->num_sgprs = MAX2(shader->num_sgprs, 
(G_00B028_SGPRS(value) + 1) * 8);
+                       shader->num_vgprs = MAX2(shader->num_vgprs, 
(G_00B028_VGPRS(value) + 1) * 4);
                        break;
                case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
-                       shader->lds_size = G_00B02C_EXTRA_LDS_SIZE(value);
+                       shader->lds_size = MAX2(shader->lds_size, 
G_00B02C_EXTRA_LDS_SIZE(value));
                        break;
                case R_00B84C_COMPUTE_PGM_RSRC2:
-                       shader->lds_size = G_00B84C_LDS_SIZE(value);
+                       shader->lds_size = MAX2(shader->lds_size, 
G_00B84C_LDS_SIZE(value));
                        break;
                case R_0286CC_SPI_PS_INPUT_ENA:
                        shader->spi_ps_input_ena = value;
@@ -2560,9 +2543,32 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
                        break;
                }
        }
+}
+
+int si_shader_binary_read(struct si_screen *sscreen,
+                       struct si_shader *shader,
+                       const struct radeon_shader_binary *binary)
+{
+
+       unsigned i;
+       unsigned code_size;
+       unsigned char *ptr;
+       bool dump  = r600_can_dump_shader(&sscreen->b,
+               shader->selector ? shader->selector->tokens : NULL);
+
+       if (dump && !binary->disassembled) {
+               fprintf(stderr, "SI CODE:\n");
+               for (i = 0; i < binary->code_size; i+=4 ) {
+                       fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, 
binary->code[i + 3],
+                               binary->code[i + 2], binary->code[i + 1],
+                               binary->code[i]);
+               }
+       }
+
+       si_shader_binary_read_config(binary, shader, 0);
 
        /* copy new shader */
-       code_size = binary.code_size + binary.rodata_size;
+       code_size = binary->code_size + binary->rodata_size;
        r600_resource_reference(&shader->bo, NULL);
        shader->bo = si_resource_create_custom(&sscreen->b.b, 
PIPE_USAGE_IMMUTABLE,
                                               code_size);
@@ -2570,19 +2576,37 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
                return -ENOMEM;
        }
 
-       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, 
PIPE_TRANSFER_WRITE);
-       util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
-       if (binary.rodata_size > 0) {
-               ptr += binary.code_size;
-               util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
+
+       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, 
PIPE_TRANSFER_READ_WRITE);
+       util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
+       if (binary->rodata_size > 0) {
+               ptr += binary->code_size;
+               util_memcpy_cpu_to_le32(ptr, binary->rodata, 
binary->rodata_size);
        }
 
        sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
 
-       free(binary.code);
-       free(binary.config);
-       free(binary.rodata);
+       return 0;
+}
+
+int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
+                                                       LLVMModuleRef mod)
+{
+       int r = 0;
+       struct radeon_shader_binary binary;
+       bool dump = r600_can_dump_shader(&sscreen->b,
+                       shader->selector ? shader->selector->tokens : NULL);
+       memset(&binary, 0, sizeof(binary));
+       r = radeon_llvm_compile(mod, &binary,
+               r600_get_llvm_processor_name(sscreen->b.family), dump);
 
+       if (r) {
+               return r;
+       }
+       r = si_shader_binary_read(sscreen, shader, &binary);
+       FREE(binary.code);
+       FREE(binary.config);
+       FREE(binary.rodata);
        return r;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 30e6854..5e8c9e6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -33,6 +33,8 @@
 #include "tgsi/tgsi_scan.h"
 #include "si_state.h"
 
+struct radeon_shader_binary;
+
 #define SI_SGPR_RW_BUFFERS     0  /* rings (& stream-out, VS only) */
 #define SI_SGPR_CONST          2
 #define SI_SGPR_SAMPLER                4
@@ -180,5 +182,10 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
                    LLVMModuleRef mod);
 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
+               const struct radeon_shader_binary *binary);
+void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+                               struct si_shader *shader,
+                               unsigned symbol_offset);
 
 #endif
-- 
1.8.5.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to