[Mesa-dev] [PATCH v2 13/73] ac, radeonsi: move some VS input descriptions to ac_shader_abi

Nicolai Hähnle Wed, 05 Jul 2017 03:50:19 -0700

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

v2: use LLVM values instead of function parameter indices


Reviewed-by: Marek Olšák <marek.ol...@amd.com> (v1)
---
 src/amd/common/ac_shader_abi.h                    | 40 +++++++++++++++
 src/gallium/drivers/radeonsi/si_shader.c          | 60 +++++++++++++----------
 src/gallium/drivers/radeonsi/si_shader_internal.h |  8 ++-
 3 files changed, 77 insertions(+), 31 deletions(-)
 create mode 100644 src/amd/common/ac_shader_abi.h

diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
new file mode 100644
index 0000000..e10550b
--- /dev/null
+++ b/src/amd/common/ac_shader_abi.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_ABI_H
+#define AC_SHADER_ABI_H
+
+#include <llvm-c/Core.h>
+
+/* Document the shader ABI during compilation. This is what allows radeonsi and
+ * radv to share a compiler backend.
+ */
+struct ac_shader_abi {
+       LLVMValueRef base_vertex;
+       LLVMValueRef start_instance;
+       LLVMValueRef draw_id;
+       LLVMValueRef vertex_id;
+       LLVMValueRef instance_id;
+};
+
+#endif /* AC_SHADER_ABI_H */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 28923e4..d8bacdb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -60,20 +60,21 @@ struct si_shader_output_values
        unsigned semantic_index;
        ubyte vertex_stream[4];
 };
 
 /**
  * Used to collect types and other info about arguments of the LLVM function
  * before the function is created.
  */
 struct si_function_info {
        LLVMTypeRef types[100];
+       LLVMValueRef *assign[100];
        unsigned num_sgpr_params;
        unsigned num_params;
 };
 
 enum si_arg_regfile {
        ARG_SGPR,
        ARG_VGPR
 };
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
@@ -118,35 +119,43 @@ static bool is_merged_shader(struct si_shader *shader)
               shader->selector->type == PIPE_SHADER_TESS_CTRL ||
               shader->selector->type == PIPE_SHADER_GEOMETRY;
 }
 
 static void si_init_function_info(struct si_function_info *fninfo)
 {
        fninfo->num_params = 0;
        fninfo->num_sgpr_params = 0;
 }
 
-static unsigned add_arg(struct si_function_info *fninfo,
-                       enum si_arg_regfile regfile, LLVMTypeRef type)
+static unsigned add_arg_assign(struct si_function_info *fninfo,
+                       enum si_arg_regfile regfile, LLVMTypeRef type,
+                       LLVMValueRef *assign)
 {
        assert(regfile != ARG_SGPR || fninfo->num_sgpr_params == 
fninfo->num_params);
 
        unsigned idx = fninfo->num_params++;
        assert(idx < ARRAY_SIZE(fninfo->types));
 
        if (regfile == ARG_SGPR)
                fninfo->num_sgpr_params = fninfo->num_params;
 
        fninfo->types[idx] = type;
+       fninfo->assign[idx] = assign;
        return idx;
 }
 
+static unsigned add_arg(struct si_function_info *fninfo,
+                       enum si_arg_regfile regfile, LLVMTypeRef type)
+{
+       return add_arg_assign(fninfo, regfile, type, NULL);
+}
+
 static void add_arg_checked(struct si_function_info *fninfo,
                            enum si_arg_regfile regfile, LLVMTypeRef type,
                            unsigned idx)
 {
        MAYBE_UNUSED unsigned actual = add_arg(fninfo, regfile, type);
        assert(actual == idx);
 }
 
 /**
  * Returns a unique index for a per-patch semantic name and index. The index
@@ -353,22 +362,21 @@ get_tcs_out_current_patch_data_offset(struct 
si_shader_context *ctx)
                                         rel_patch_id, ""),
                            "");
 }
 
 static LLVMValueRef get_instance_index_for_fetch(
        struct si_shader_context *ctx,
        unsigned param_start_instance, LLVMValueRef divisor)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
 
-       LLVMValueRef result = LLVMGetParam(ctx->main_fn,
-                                          ctx->param_instance_id);
+       LLVMValueRef result = ctx->abi.instance_id;
 
        /* The division must be done before START_INSTANCE is added. */
        if (divisor != ctx->i32_1)
                result = LLVMBuildUDiv(gallivm->builder, result, divisor, "");
 
        return LLVMBuildAdd(gallivm->builder, result,
                            LLVMGetParam(ctx->main_fn, param_start_instance), 
"");
 }
 
 /* Bitcast <4 x float> to <2 x double>, extract the component, and convert
@@ -1463,30 +1471,27 @@ static void declare_system_value(struct 
si_shader_context *ctx,
                                 const struct tgsi_full_declaration *decl)
 {
        struct lp_build_context *bld = &ctx->bld_base.base;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value = 0;
 
        assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
 
        switch (decl->Semantic.Name) {
        case TGSI_SEMANTIC_INSTANCEID:
-               value = LLVMGetParam(ctx->main_fn,
-                                    ctx->param_instance_id);
+               value = ctx->abi.instance_id;
                break;
 
        case TGSI_SEMANTIC_VERTEXID:
                value = LLVMBuildAdd(gallivm->builder,
-                                    LLVMGetParam(ctx->main_fn,
-                                                 ctx->param_vertex_id),
-                                    LLVMGetParam(ctx->main_fn,
-                                                 ctx->param_base_vertex), "");
+                                    ctx->abi.vertex_id,
+                                    ctx->abi.base_vertex, "");
                break;
 
        case TGSI_SEMANTIC_VERTEXID_NOBASE:
                /* Unused. Clarify the meaning in indexed vs. non-indexed
                 * draws if this is ever used again. */
                assert(false);
                break;
 
        case TGSI_SEMANTIC_BASEVERTEX:
        {
@@ -1494,31 +1499,30 @@ static void declare_system_value(struct 
si_shader_context *ctx,
                 * (for direct draws) or the CP (for indirect draws) is the
                 * first vertex ID, but GLSL expects 0 to be returned.
                 */
                LLVMValueRef vs_state = LLVMGetParam(ctx->main_fn, 
ctx->param_vs_state_bits);
                LLVMValueRef indexed;
 
                indexed = LLVMBuildLShr(gallivm->builder, vs_state, ctx->i32_1, 
"");
                indexed = LLVMBuildTrunc(gallivm->builder, indexed, ctx->i1, 
"");
 
                value = LLVMBuildSelect(gallivm->builder, indexed,
-                                       LLVMGetParam(ctx->main_fn, 
ctx->param_base_vertex),
-                                       ctx->i32_0, "");
+                                       ctx->abi.base_vertex, ctx->i32_0, "");
                break;
        }
 
        case TGSI_SEMANTIC_BASEINSTANCE:
-               value = LLVMGetParam(ctx->main_fn, ctx->param_start_instance);
+               value = ctx->abi.start_instance;
                break;
 
        case TGSI_SEMANTIC_DRAWID:
-               value = LLVMGetParam(ctx->main_fn, ctx->param_draw_id);
+               value = ctx->abi.draw_id;
                break;
 
        case TGSI_SEMANTIC_INVOCATIONID:
                if (ctx->type == PIPE_SHADER_TESS_CTRL)
                        value = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
                else if (ctx->type == PIPE_SHADER_GEOMETRY)
                        value = LLVMGetParam(ctx->main_fn,
                                             ctx->param_gs_instance_id);
                else
                        assert(!"INVOCATIONID not implemented");
@@ -4006,20 +4010,25 @@ static void si_create_function(struct si_shader_context 
*ctx,
                 * SGPR spilling significantly.
                 */
                if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
                        lp_add_function_attr(ctx->main_fn, i + 1, 
LP_FUNC_ATTR_BYVAL);
                        lp_add_function_attr(ctx->main_fn, i + 1, 
LP_FUNC_ATTR_NOALIAS);
                        ac_add_attr_dereferenceable(P, UINT64_MAX);
                } else
                        lp_add_function_attr(ctx->main_fn, i + 1, 
LP_FUNC_ATTR_INREG);
        }
 
+       for (i = 0; i < fninfo->num_params; ++i) {
+               if (fninfo->assign[i])
+                       *fninfo->assign[i] = LLVMGetParam(ctx->main_fn, i);
+       }
+
        if (max_workgroup_size) {
                si_llvm_add_attribute(ctx->main_fn, 
"amdgpu-max-work-group-size",
                                      max_workgroup_size);
        }
        LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
                                           "no-signed-zeros-fp-math",
                                           "true");
 
        if (ctx->screen->b.debug_flags & DBG_UNSAFE_MATH) {
                /* These were copied from some LLVM test. */
@@ -4153,38 +4162,38 @@ static void declare_default_desc_pointers(struct 
si_shader_context *ctx,
        ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
                si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
        declare_per_stage_desc_pointers(ctx, fninfo, true);
 }
 
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
                                            struct si_function_info *fninfo)
 {
        ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
                si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS));
-       ctx->param_base_vertex = add_arg(fninfo, ARG_SGPR, ctx->i32);
-       ctx->param_start_instance = add_arg(fninfo, ARG_SGPR, ctx->i32);
-       ctx->param_draw_id = add_arg(fninfo, ARG_SGPR, ctx->i32);
+       add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
+       add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
+       add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
        ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
 }
 
 static void declare_vs_input_vgprs(struct si_shader_context *ctx,
                                   struct si_function_info *fninfo,
                                   unsigned *num_prolog_vgprs)
 {
        struct si_shader *shader = ctx->shader;
 
-       ctx->param_vertex_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+       add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.vertex_id);
        if (shader->key.as_ls) {
                ctx->param_rel_auto_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
-               ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+               add_arg_assign(fninfo, ARG_VGPR, ctx->i32, 
&ctx->abi.instance_id);
        } else {
-               ctx->param_instance_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+               add_arg_assign(fninfo, ARG_VGPR, ctx->i32, 
&ctx->abi.instance_id);
                ctx->param_vs_prim_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
        }
        add_arg(fninfo, ARG_VGPR, ctx->i32); /* unused */
 
        if (!shader->is_gs_copy_shader) {
                /* Vertex load indices. */
                ctx->param_vertex_index0 = fninfo->num_params;
                for (unsigned i = 0; i < shader->selector->info.num_inputs; i++)
                        add_arg(fninfo, ARG_VGPR, ctx->i32);
                *num_prolog_vgprs += shader->selector->info.num_inputs;
@@ -5196,22 +5205,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        si_init_shader_ctx(&ctx, sscreen, tm);
        ctx.shader = shader;
        ctx.type = PIPE_SHADER_VERTEX;
 
        builder = gallivm->builder;
 
        create_function(&ctx);
        preload_ring_buffers(&ctx);
 
        LLVMValueRef voffset =
-               lp_build_mul_imm(uint, LLVMGetParam(ctx.main_fn,
-                                                   ctx.param_vertex_id), 4);
+               lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
 
        /* Fetch the vertex stream ID.*/
        LLVMValueRef stream_id;
 
        if (gs_selector->so.num_outputs)
                stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 
2);
        else
                stream_id = ctx.i32_0;
 
        /* Fill in output information. */
@@ -6594,42 +6602,42 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
        LLVMTypeRef *returns;
        LLVMValueRef ret, func;
        int num_returns, i;
        unsigned first_vs_vgpr = key->vs_prolog.num_input_sgprs +
                                 key->vs_prolog.num_merged_next_stage_vgprs;
        unsigned num_input_vgprs = key->vs_prolog.num_merged_next_stage_vgprs + 
4;
        unsigned num_all_input_regs = key->vs_prolog.num_input_sgprs +
                                      num_input_vgprs;
        unsigned user_sgpr_base = key->vs_prolog.num_merged_next_stage_vgprs ? 
8 : 0;
 
-       ctx->param_vertex_id = first_vs_vgpr;
-       ctx->param_instance_id = first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1);
-
        si_init_function_info(&fninfo);
 
        /* 4 preloaded VGPRs + vertex load indices as prolog outputs */
        returns = alloca((num_all_input_regs + key->vs_prolog.last_input + 1) *
                         sizeof(LLVMTypeRef));
        num_returns = 0;
 
        /* Declare input and output SGPRs. */
        for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
                add_arg(&fninfo, ARG_SGPR, ctx->i32);
                returns[num_returns++] = ctx->i32;
        }
 
        /* Preloaded VGPRs (outputs must be floats) */
        for (i = 0; i < num_input_vgprs; i++) {
                add_arg(&fninfo, ARG_VGPR, ctx->i32);
                returns[num_returns++] = ctx->f32;
        }
 
+       fninfo.assign[first_vs_vgpr] = &ctx->abi.vertex_id;
+       fninfo.assign[first_vs_vgpr + (key->vs_prolog.as_ls ? 2 : 1)] = 
&ctx->abi.instance_id;
+
        /* Vertex load indices. */
        for (i = 0; i <= key->vs_prolog.last_input; i++)
                returns[num_returns++] = ctx->f32;
 
        /* Create the function. */
        si_create_function(ctx, "vs_prolog", returns, num_returns, &fninfo, 0);
        func = ctx->main_fn;
 
        if (key->vs_prolog.num_merged_next_stage_vgprs &&
            !key->vs_prolog.is_monolithic)
@@ -6678,21 +6686,21 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
                        }
 
                        /* InstanceID / Divisor + StartInstance */
                        index = get_instance_index_for_fetch(ctx,
                                                             user_sgpr_base +
                                                             
SI_SGPR_START_INSTANCE,
                                                             divisor);
                } else {
                        /* VertexID + BaseVertex */
                        index = LLVMBuildAdd(gallivm->builder,
-                                            LLVMGetParam(func, 
ctx->param_vertex_id),
+                                            ctx->abi.vertex_id,
                                             LLVMGetParam(func, user_sgpr_base +
                                                                
SI_SGPR_BASE_VERTEX), "");
                }
 
                index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");
                ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
                                           fninfo.num_params + i, "");
        }
 
        si_llvm_build_ret(ctx, ret);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 3556e69..90a70b1 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -21,20 +21,21 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef SI_SHADER_PRIVATE_H
 #define SI_SHADER_PRIVATE_H
 
 #include "si_shader.h"
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "tgsi/tgsi_parse.h"
+#include "ac_shader_abi.h"
 #include "ac_llvm_util.h"
 #include "ac_llvm_build.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/TargetMachine.h>
 
 struct pipe_debug_callback;
 struct ac_shader_binary;
 
 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
@@ -59,20 +60,22 @@ struct si_shader_context {
 
        /* For clamping the non-constant index in resource indexing: */
        unsigned num_const_buffers;
        unsigned num_shader_buffers;
        unsigned num_images;
        unsigned num_samplers;
 
        /* Whether the prolog will be compiled separately. */
        bool separate_prolog;
 
+       struct ac_shader_abi abi;
+
        /** This function is responsible for initilizing the inputs array and 
will be
          * called once for each input declared in the TGSI shader.
          */
        void (*load_input)(struct si_shader_context *,
                           unsigned input_index,
                           const struct tgsi_full_declaration *decl,
                           LLVMValueRef out[4]);
 
        void (*load_system_value)(struct si_shader_context *,
                                  unsigned index,
@@ -115,27 +118,22 @@ struct si_shader_context {
 
        /* Parameter indices for LLVMGetParam. */
        int param_rw_buffers;
        int param_const_and_shader_buffers;
        int param_samplers_and_images;
        /* Common inputs for merged shaders. */
        int param_merged_wave_info;
        int param_merged_scratch_offset;
        /* API VS */
        int param_vertex_buffers;
-       int param_base_vertex;
-       int param_start_instance;
-       int param_draw_id;
-       int param_vertex_id;
        int param_rel_auto_id;
        int param_vs_prim_id;
-       int param_instance_id;
        int param_vertex_index0;
        /* VS states and layout of LS outputs / TCS inputs at the end
         *   [0] = clamp vertex color
         *   [1] = indexed
         *   [8:20] = stride between patches in DW = num_inputs * num_vertices 
* 4
         *            max = 32*32*4 + 32*4
         *   [24:31] = stride between vertices in DW = num_inputs * 4
         *             max = 32*4
         */
        int param_vs_state_bits;
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 13/73] ac, radeonsi: move some VS input descriptions to ac_shader_abi

Reply via email to