On 16.11.2016 16:38, Tom Stellard wrote:
On Wed, Nov 16, 2016 at 11:13:45AM +0100, Nicolai Hähnle wrote:
Have you looked at the shader-db impact?


shader-db is mostly unchanged.  There are a few decreases in SGPR usage and
code size, and a 4 byte increase in code size for one shader.

Okay, in that case you can add my R-b to this patch as well.


I do think we should eventually do this, but llvm.SI.vs.load.input is
ReadNone while llvm.amdgcn.buffer.load.* is only ReadOnly, so as long as we
can't teach LLVM properly about no-aliasing and speculability, there may be
performance regressions.


Ideally llvm.amdgcn.buffer.load.* would be ReadOnly and ArgMemOnly, but I think
as long as it has non-pointer arguments this combination behaves the same as
ReadNone, which would be incorrect.

Agreed. This is something that the "fat" pointers would help with, right?

Cheers,
Nicolai


-Tom

Cheers,
Nicolai

On 16.11.2016 03:14, Tom Stellard wrote:
---
src/gallium/drivers/radeonsi/si_shader.c | 69 +++++++++++++++++++++++---------
1 file changed, 50 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 306e12f..ee4fe2f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -82,6 +82,17 @@ static void si_build_ps_prolog_function(struct 
si_shader_context *ctx,
static void si_build_ps_epilog_function(struct si_shader_context *ctx,
                                        union si_shader_part_key *key);

+static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
+                                      LLVMValueRef rsrc,
+                                      int num_channels,
+                                      LLVMValueRef vindex,
+                                      LLVMValueRef voffset,
+                                      LLVMValueRef soffset,
+                                      unsigned inst_offset,
+                                      unsigned glc,
+                                      unsigned slc,
+                                     bool is_format);
+
/* Ideally pass the sample mask input to the PS epilog as v13, which
 * is its usual location, so that the shader doesn't have to add v_mov.
 */
@@ -368,6 +379,31 @@ static LLVMValueRef get_instance_index_for_fetch(
                            LLVMGetParam(radeon_bld->main_fn, param_start_instance), 
"");
}

+static LLVMValueRef build_vs_load_input(struct si_shader_context *ctx,
+                                       LLVMValueRef rsrc,
+                                       LLVMValueRef index,
+                                       LLVMValueRef offset) {
+
+       struct lp_build_context *base = &ctx->soa.bld_base.base;
+       struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
+       struct gallivm_state *gallivm = base->gallivm;
+
+       LLVMValueRef args[8];
+
+       if (HAVE_LLVM < 0x0400) {
+               args[0] = rsrc;
+               args[1] = offset;
+               args[2] = index;
+
+               return lp_build_intrinsic(gallivm->builder,
+                       "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
+                       LP_FUNC_ATTR_READNONE);
+       }
+
+       return build_buffer_load(ctx, rsrc, 4, index, offset,
+                                uint->zero, 0, 0, 0, true);
+}
+
static void declare_input_vs(
        struct si_shader_context *ctx,
        unsigned input_index,
@@ -385,7 +421,6 @@ static void declare_input_vs(
        LLVMValueRef t_list;
        LLVMValueRef attribute_offset;
        LLVMValueRef buffer_index;
-       LLVMValueRef args[3];
        LLVMValueRef input;

        /* Load the T list */
@@ -402,12 +437,8 @@ static void declare_input_vs(
                                    ctx->param_vertex_index0 +
                                    input_index);

-       args[0] = t_list;
-       args[1] = attribute_offset;
-       args[2] = buffer_index;
-       input = lp_build_intrinsic(gallivm->builder,
-               "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
-               LP_FUNC_ATTR_READNONE);
+       input = build_vs_load_input(ctx, t_list, buffer_index,
+                                   attribute_offset);

        /* Break up the vec4 into individual components */
        for (chan = 0; chan < 4; chan++) {
@@ -808,7 +839,8 @@ static LLVMValueRef build_buffer_load(struct 
si_shader_context *ctx,
                                      LLVMValueRef soffset,
                                      unsigned inst_offset,
                                      unsigned glc,
-                                      unsigned slc)
+                                      unsigned slc,
+                                     bool is_format)
{
        struct gallivm_state *gallivm = &ctx->gallivm;
        unsigned func = CLAMP(num_channels, 1, 3) - 1;
@@ -837,8 +869,8 @@ static LLVMValueRef build_buffer_load(struct 
si_shader_context *ctx,
                                               "");
                }

-               snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
-                        type_names[func]);
+               snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s%s",
+                        is_format ? "format." : "", type_names[func]);

                return lp_build_intrinsic(gallivm->builder, name, types[func], 
args,
                                          ARRAY_SIZE(args), 
LP_FUNC_ATTR_READONLY);
@@ -889,14 +921,14 @@ static LLVMValueRef buffer_load(struct 
lp_build_tgsi_context *bld_base,

        if (swizzle == ~0) {
                value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
-                                         0, 1, 0);
+                                         0, 1, 0, false);

                return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
        }

        if (!tgsi_type_is_64bit(type)) {
                value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
-                                         0, 1, 0);
+                                         0, 1, 0, false);

                value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
                return LLVMBuildExtractElement(gallivm->builder, value,
@@ -904,10 +936,10 @@ static LLVMValueRef buffer_load(struct 
lp_build_tgsi_context *bld_base,
        }

        value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
-                                 swizzle * 4, 1, 0);
+                                 swizzle * 4, 1, 0, false);

        value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
-                                  swizzle * 4 + 4, 1, 0);
+                                  swizzle * 4 + 4, 1, 0, false);

        return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
}
@@ -4779,11 +4811,10 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
        const char *infix = "";

        if (target == TGSI_TEXTURE_BUFFER) {
-               emit_data->output[emit_data->chan] = lp_build_intrinsic(
-                       base->gallivm->builder,
-                       "llvm.SI.vs.load.input", emit_data->dst_type,
-                       emit_data->args, emit_data->arg_count,
-                       LP_FUNC_ATTR_READNONE);
+               emit_data->output[emit_data->chan] =
+                       build_vs_load_input(ctx, emit_data->args[0],
+                                           emit_data->args[2],
+                                           emit_data->args[1]);
                return;
        }


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to