reserve buffer id 2 for rodata Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu> ---
This needs coresponding LLVM changes (see [0], also posted) to work, but it does not break anything (except waste a buffer) without. Both series fix table based builtins (acosh, atanh, cosh, log1p), as well as gegl's format conversion kernels. There's one more problem that might need to be addressed on mesa's side: The generated code can have more than one rodata section (like rodata.cst16, rodata.cst4). Right now radeon_elf_read only keeps the last section (leaking the previous ones). We can either concatenate the sections in mesa, or instruct llvm to put everything in one section. cbrt builtin and the updated(posted) program-scope-arrays piglit hit this. suggestions welcome. jan [0] https://github.com/jvesely/llvm src/gallium/drivers/r600/evergreen_compute.c | 32 +++++++++++++++++++--- .../drivers/r600/evergreen_compute_internal.h | 1 + src/gallium/drivers/r600/r600_asm.h | 2 ++ src/gallium/drivers/r600/r600_llvm.c | 3 ++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 4b2d780..25f5f7d 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -229,11 +229,27 @@ void *evergreen_create_compute_state( radeon_elf_read(code, header->num_bytes, &shader->binary, true); r600_create_shader(&shader->bc, &shader->binary, &use_kill); + /* Upload code */ shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, - shader->bc.ndw * 4); + shader->bc.ndw * 4); p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + //TODO: use util_memcpy_cpu_to_le32 ? memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf); + p = NULL; + + /* Upload const data */ + if (shader->bc.nrodb) { + shader->const_bo = r600_compute_buffer_alloc_vram(ctx->screen, + shader->bc.nrodb); + p = r600_buffer_map_sync_with_rings(&ctx->b, shader->const_bo, PIPE_TRANSFER_WRITE); + //TODO: use util_memcpy_cpu_to_le32 ? + memcpy(p, shader->bc.rodata, shader->bc.nrodb); + ctx->b.ws->buffer_unmap(shader->const_bo->cs_buf); + p = NULL; + } else { + shader->const_bo = NULL; + } #endif #endif @@ -628,9 +644,9 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_, start, count); for (unsigned i = 0; i < count; i++) { - /* The First two vertex buffers are reserved for parameters and + /* The First three vertex buffers are reserved for parameters and * global buffers. */ - unsigned vtx_id = 2 + i; + unsigned vtx_id = 3 + i; if (resources[i]) { struct r600_resource_global *buffer = (struct r600_resource_global*) @@ -716,9 +732,17 @@ static void evergreen_set_global_binding( *(handles[i]) = util_cpu_to_le32(handle); } - evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4); + /* globals for writing */ + evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, + pool->size_in_dw * 4); + /* globals for reading */ evergreen_cs_set_vertex_buffer(ctx, 1, 0, (struct pipe_resource*)pool->bo); + + /* constants for reading */ + if (ctx->cs_shader_state.shader->const_bo) + evergreen_cs_set_vertex_buffer(ctx, 2, 0, + (struct pipe_resource*)ctx->cs_shader_state.shader->const_bo); } /** diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.h b/src/gallium/drivers/r600/evergreen_compute_internal.h index 95593dd..880a993 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.h +++ b/src/gallium/drivers/r600/evergreen_compute_internal.h @@ -51,6 +51,7 @@ struct r600_pipe_compute { struct radeon_shader_binary binary; struct r600_resource *code_bo; + struct r600_resource *const_bo; struct r600_bytecode bc; unsigned local_size; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 4f723c1..df3f7dd 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -208,6 +208,7 @@ struct r600_bytecode { struct list_head cf; struct r600_bytecode_cf *cf_last; unsigned ndw; + unsigned nrodb; unsigned ncf; unsigned ngpr; unsigned nstack; @@ -215,6 +216,7 @@ struct r600_bytecode { unsigned nresource; unsigned force_add_cf; uint32_t *bytecode; + uint32_t *rodata; uint32_t fc_sp; struct r600_cf_stack_entry fc_stack[32]; struct r600_stack_info stack; diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 72e2dc4..94085fc 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -861,8 +861,11 @@ unsigned r600_create_shader(struct r600_bytecode *bc, { assert(binary->code_size % 4 == 0); bc->bytecode = CALLOC(1, binary->code_size); + bc->rodata = CALLOC(1, binary->rodata_size); memcpy(bc->bytecode, binary->code, binary->code_size); bc->ndw = binary->code_size / 4; + bc->nrodb = binary->rodata_size; + memcpy(bc->rodata, binary->rodata, binary->rodata_size); r600_shader_binary_read_config(binary, bc, 0, use_kill); -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev