On 13.09.2016 22:20, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

26011 shaders in 14651 tests
Totals:
SGPRS: 1251920 -> 1152636 (-7.93 %)
VGPRS: 728421 -> 728198 (-0.03 %)
Spilled SGPRs: 16644 -> 3776 (-77.31 %)
Spilled VGPRs: 369 -> 369 (0.00 %)
Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread
Code Size: 36001064 -> 35835152 (-0.46 %) bytes
LDS: 767 -> 767 (0.00 %) blocks
Max Waves: 222221 -> 222372 (0.07 %)
Wait states: 0 -> 0 (0.00 %)

v2: merge codepaths where possible
---
 src/gallium/drivers/radeonsi/si_shader.c | 173 ++++++++-----------------------
 1 file changed, 41 insertions(+), 132 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 84cbfd7..6f9c45f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -100,25 +100,20 @@ struct si_shader_context

        LLVMTargetMachineRef tm;

        unsigned invariant_load_md_kind;
        unsigned range_md_kind;
        unsigned uniform_md_kind;
        LLVMValueRef empty_md;

        /* Preloaded descriptors. */
        LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
-       LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
-       LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
-       LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
-       LLVMValueRef fmasks[SI_NUM_SAMPLERS];
-       LLVMValueRef images[SI_NUM_IMAGES];
        LLVMValueRef esgs_ring;
        LLVMValueRef gsvs_ring[4];

        LLVMValueRef lds;
        LLVMValueRef gs_next_vertex[4];
        LLVMValueRef return_value;

        LLVMTypeRef voidt;
        LLVMTypeRef i1;
        LLVMTypeRef i8;
@@ -3399,32 +3394,32 @@ static void membar_emit(
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);

        emit_waitcnt(ctx);
 }

 static LLVMValueRef
 shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
                         const struct tgsi_full_src_register *reg)
 {
-       LLVMValueRef ind_index;
-       LLVMValueRef rsrc_ptr;
+       LLVMValueRef index;
+       LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
+                                            SI_PARAM_SHADER_BUFFERS);

        if (!reg->Register.Indirect)
-               return ctx->shader_buffers[reg->Register.Index];
-
-       ind_index = get_bounded_indirect_index(ctx, &reg->Indirect,
-                                              reg->Register.Index,
-                                              SI_NUM_SHADER_BUFFERS);
+               index = LLVMConstInt(ctx->i32, reg->Register.Index, 0);
+       else
+               index = get_bounded_indirect_index(ctx, &reg->Indirect,
+                                                  reg->Register.Index,
+                                                  SI_NUM_SHADER_BUFFERS);

-       rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_SHADER_BUFFERS);
-       return build_indexed_load_const(ctx, rsrc_ptr, ind_index);
+       return build_indexed_load_const(ctx, rsrc_ptr, index);
 }

 static bool tgsi_is_array_sampler(unsigned target)
 {
        return target == TGSI_TEXTURE_1D_ARRAY ||
               target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
               target == TGSI_TEXTURE_2D_ARRAY ||
               target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
               target == TGSI_TEXTURE_CUBE_ARRAY ||
               target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
@@ -3473,51 +3468,47 @@ static LLVMValueRef force_dcc_off(struct 
si_shader_context *ctx,
  * Load the resource descriptor for \p image.
  */
 static void
 image_fetch_rsrc(
        struct lp_build_tgsi_context *bld_base,
        const struct tgsi_full_src_register *image,
        bool dcc_off,
        LLVMValueRef *rsrc)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
+       LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
+                                            SI_PARAM_IMAGES);
+       LLVMValueRef index, tmp;

        assert(image->Register.File == TGSI_FILE_IMAGE);

        if (!image->Register.Indirect) {
-               /* Fast path: use preloaded resources */
-               *rsrc = ctx->images[image->Register.Index];
+               index = LLVMConstInt(ctx->i32, image->Register.Index, 0);

I think it would be beneficial to put

        if (info->images_writemask & (1 << image->Register.Index) &&
            !(info->images_buffers & (1 << image->Register.Index)))
                dcc_off = true;

here, so that CSE can work better when an image is both read from and written to.

Apart from that, the patch is

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

        } else {
-               /* Indexing and manual load */
-               LLVMValueRef ind_index;
-               LLVMValueRef rsrc_ptr;
-               LLVMValueRef tmp;
-
                /* From the GL_ARB_shader_image_load_store extension spec:
                 *
                 *    If a shader performs an image load, store, or atomic
                 *    operation using an image variable declared as an array,
                 *    and if the index used to select an individual element is
                 *    negative or greater than or equal to the size of the
                 *    array, the results of the operation are undefined but may
                 *    not lead to termination.
                 */
-               ind_index = get_bounded_indirect_index(ctx, &image->Indirect,
-                                                      image->Register.Index,
-                                                      SI_NUM_IMAGES);
-
-               rsrc_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_IMAGES);
-               tmp = build_indexed_load_const(ctx, rsrc_ptr, ind_index);
-               if (dcc_off)
-                       tmp = force_dcc_off(ctx, tmp);
-               *rsrc = tmp;
+               index = get_bounded_indirect_index(ctx, &image->Indirect,
+                                                  image->Register.Index,
+                                                  SI_NUM_IMAGES);
        }
+
+       tmp = build_indexed_load_const(ctx, rsrc_ptr, index);
+       if (dcc_off)
+               tmp = force_dcc_off(ctx, tmp);
+       *rsrc = tmp;
 }

 static LLVMValueRef image_fetch_coords(
                struct lp_build_tgsi_context *bld_base,
                const struct tgsi_full_instruction *inst,
                unsigned src)
 {
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        unsigned target = inst->Memory.Texture;
@@ -4355,55 +4346,51 @@ static LLVMValueRef sici_fix_sampler_aniso(struct 
si_shader_context *ctx,
 static void tex_fetch_ptrs(
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data,
        LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        const struct tgsi_full_instruction *inst = emit_data->inst;
        unsigned target = inst->Texture.Texture;
        unsigned sampler_src;
        unsigned sampler_index;
+       LLVMValueRef index;

        sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
        sampler_index = emit_data->inst->Src[sampler_src].Register.Index;

        if (emit_data->inst->Src[sampler_src].Register.Indirect) {
                const struct tgsi_full_src_register *reg = 
&emit_data->inst->Src[sampler_src];
-               LLVMValueRef ind_index;

-               ind_index = get_bounded_indirect_index(ctx,
-                                                      &reg->Indirect,
-                                                      reg->Register.Index,
-                                                      SI_NUM_SAMPLERS);
+               index = get_bounded_indirect_index(ctx,
+                                                  &reg->Indirect,
+                                                  reg->Register.Index,
+                                                  SI_NUM_SAMPLERS);
+       } else {
+               index = LLVMConstInt(ctx->i32, sampler_index, 0);
+       }

-               *res_ptr = load_sampler_desc(ctx, ind_index, DESC_IMAGE);
+       *res_ptr = load_sampler_desc(ctx, index, DESC_IMAGE);

-               if (target == TGSI_TEXTURE_2D_MSAA ||
-                   target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-                       if (samp_ptr)
-                               *samp_ptr = NULL;
-                       if (fmask_ptr)
-                               *fmask_ptr = load_sampler_desc(ctx, ind_index, 
DESC_FMASK);
-               } else {
-                       if (samp_ptr) {
-                               *samp_ptr = load_sampler_desc(ctx, ind_index, 
DESC_SAMPLER);
-                               *samp_ptr = sici_fix_sampler_aniso(ctx, 
*res_ptr, *samp_ptr);
-                       }
-                       if (fmask_ptr)
-                               *fmask_ptr = NULL;
-               }
-       } else {
-               *res_ptr = ctx->sampler_views[sampler_index];
+       if (target == TGSI_TEXTURE_2D_MSAA ||
+           target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
                if (samp_ptr)
-                       *samp_ptr = ctx->sampler_states[sampler_index];
+                       *samp_ptr = NULL;
                if (fmask_ptr)
-                       *fmask_ptr = ctx->fmasks[sampler_index];
+                       *fmask_ptr = load_sampler_desc(ctx, index, DESC_FMASK);
+       } else {
+               if (samp_ptr) {
+                       *samp_ptr = load_sampler_desc(ctx, index, DESC_SAMPLER);
+                       *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, 
*samp_ptr);
+               }
+               if (fmask_ptr)
+                       *fmask_ptr = NULL;
        }
 }

 static void txq_fetch_args(
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMBuilderRef builder = gallivm->builder;
@@ -5856,95 +5843,20 @@ static void preload_constant_buffers(struct 
si_shader_context *ctx)
        for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
                if (info->const_file_max[buf] == -1)
                        continue;

                /* Load the resource descriptor */
                ctx->const_buffers[buf] =
                        build_indexed_load_const(ctx, ptr, 
lp_build_const_int32(gallivm, buf));
        }
 }

-static void preload_shader_buffers(struct si_shader_context *ctx)
-{
-       struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
-       LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, 
SI_PARAM_SHADER_BUFFERS);
-       int buf, maxbuf;
-
-       maxbuf = MIN2(ctx->shader->selector->info.file_max[TGSI_FILE_BUFFER],
-                     SI_NUM_SHADER_BUFFERS - 1);
-       for (buf = 0; buf <= maxbuf; ++buf) {
-               ctx->shader_buffers[buf] =
-                       build_indexed_load_const(
-                               ctx, ptr, lp_build_const_int32(gallivm, buf));
-       }
-}
-
-static void preload_samplers(struct si_shader_context *ctx)
-{
-       struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
-       const struct tgsi_shader_info *info = bld_base->info;
-       unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
-       LLVMValueRef offset;
-
-       if (num_samplers == 0)
-               return;
-
-       /* Load the resources and samplers, we rely on the code sinking to do 
the rest */
-       for (i = 0; i < num_samplers; ++i) {
-               /* Resource */
-               offset = lp_build_const_int32(gallivm, i);
-               ctx->sampler_views[i] =
-                       load_sampler_desc(ctx, offset, DESC_IMAGE);
-
-               /* FMASK resource */
-               if (info->is_msaa_sampler[i])
-                       ctx->fmasks[i] =
-                               load_sampler_desc(ctx, offset, DESC_FMASK);
-               else {
-                       ctx->sampler_states[i] =
-                               load_sampler_desc(ctx, offset, DESC_SAMPLER);
-                       ctx->sampler_states[i] =
-                               sici_fix_sampler_aniso(ctx, 
ctx->sampler_views[i],
-                                                      ctx->sampler_states[i]);
-               }
-       }
-}
-
-static void preload_images(struct si_shader_context *ctx)
-{
-       struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
-       struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
-       unsigned num_images = bld_base->info->file_max[TGSI_FILE_IMAGE] + 1;
-       LLVMValueRef res_ptr;
-       unsigned i;
-
-       if (num_images == 0)
-               return;
-
-       res_ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_IMAGES);
-
-       for (i = 0; i < num_images; ++i) {
-               /* Rely on LLVM to shrink the load for buffer resources. */
-               LLVMValueRef rsrc =
-                       build_indexed_load_const(ctx, res_ptr,
-                                                lp_build_const_int32(gallivm, 
i));
-
-               if (info->images_writemask & (1 << i) &&
-                   !(info->images_buffers & (1 << i)))
-                       rsrc = force_dcc_off(ctx, rsrc);
-
-               ctx->images[i] = rsrc;
-       }
-}
-
 /**
  * Load ESGS and GSVS ring buffer resource descriptors and save the variables
  * for later use.
  */
 static void preload_ring_buffers(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm =
                ctx->radeon_bld.soa.bld_base.base.gallivm;

        LLVMValueRef buf_ptr = LLVMGetParam(ctx->radeon_bld.main_fn,
@@ -6773,23 +6685,20 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                ctx.radeon_bld.declare_memory_region = declare_compute_memory;
                break;
        default:
                assert(!"Unsupported shader type");
                return -1;
        }

        create_meta_data(&ctx);
        create_function(&ctx);
        preload_constant_buffers(&ctx);
-       preload_shader_buffers(&ctx);
-       preload_samplers(&ctx);
-       preload_images(&ctx);
        preload_ring_buffers(&ctx);

        if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT &&
            shader->key.ps.prolog.poly_stipple) {
                LLVMValueRef list = LLVMGetParam(ctx.radeon_bld.main_fn,
                                                 SI_PARAM_RW_BUFFERS);
                si_llvm_emit_polygon_stipple(&ctx, list,
                                             SI_PARAM_POS_FIXED_PT);
        }


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to