From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_shader.c | 34 +++++++++++++++++++++-----------
 src/gallium/drivers/radeonsi/si_shader.h |  1 +
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b18b4f6..f1ac94f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5353,47 +5353,42 @@ static void si_shader_dump_disassembly(const struct 
ac_shader_binary *binary,
        } else {
                fprintf(file, "Shader %s binary:\n", name);
                for (i = 0; i < binary->code_size; i += 4) {
                        fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
                                binary->code[i + 3], binary->code[i + 2],
                                binary->code[i + 1], binary->code[i]);
                }
        }
 }
 
-static void si_shader_dump_stats(struct si_screen *sscreen,
-                                const struct si_shader *shader,
-                                struct pipe_debug_callback *debug,
-                                unsigned processor,
-                                FILE *file,
-                                bool check_debug_option)
+static void si_calculate_max_simd_waves(struct si_shader *shader)
 {
-       const struct si_shader_config *conf = &shader->config;
-       unsigned num_inputs = shader->selector ? 
shader->selector->info.num_inputs : 0;
-       unsigned code_size = si_get_shader_binary_size(shader);
+       struct si_screen *sscreen = shader->selector->screen;
+       struct si_shader_config *conf = &shader->config;
+       unsigned num_inputs = shader->selector->info.num_inputs;
        unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
        unsigned lds_per_wave = 0;
        unsigned max_simd_waves;
 
        switch (sscreen->info.family) {
        /* These always have 8 waves: */
        case CHIP_POLARIS10:
        case CHIP_POLARIS11:
        case CHIP_POLARIS12:
                max_simd_waves = 8;
                break;
        default:
                max_simd_waves = 10;
        }
 
        /* Compute LDS usage for PS. */
-       switch (processor) {
+       switch (shader->selector->type) {
        case PIPE_SHADER_FRAGMENT:
                /* The minimum usage per wave is (num_inputs * 48). The maximum
                 * usage is (num_inputs * 48 * 16).
                 * We can get anything in between and it varies between waves.
                 *
                 * The 48 bytes per input for a single primitive is equal to
                 * 4 bytes/component * 4 components/input * 3 points.
                 *
                 * Other stages don't know the size at compile time or don't
                 * allocate LDS per wave, but instead they do it per thread 
group.
@@ -5420,20 +5415,33 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
        }
 
        if (conf->num_vgprs)
                max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
 
        /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
         * 16KB makes some SIMDs unoccupied). */
        if (lds_per_wave)
                max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
+       conf->max_simd_waves = max_simd_waves;
+}
+
+static void si_shader_dump_stats(struct si_screen *sscreen,
+                                const struct si_shader *shader,
+                                struct pipe_debug_callback *debug,
+                                unsigned processor,
+                                FILE *file,
+                                bool check_debug_option)
+{
+       const struct si_shader_config *conf = &shader->config;
+       unsigned code_size = si_get_shader_binary_size(shader);
+
        if (!check_debug_option ||
            si_can_dump_shader(sscreen, processor)) {
                if (processor == PIPE_SHADER_FRAGMENT) {
                        fprintf(file, "*** SHADER CONFIG ***\n"
                                "SPI_PS_INPUT_ADDR = 0x%04x\n"
                                "SPI_PS_INPUT_ENA  = 0x%04x\n",
                                conf->spi_ps_input_addr, 
conf->spi_ps_input_ena);
                }
 
                fprintf(file, "*** SHADER STATS ***\n"
@@ -5444,30 +5452,30 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
                        "Private memory VGPRs: %d\n"
                        "Code Size: %d bytes\n"
                        "LDS: %d blocks\n"
                        "Scratch: %d bytes per wave\n"
                        "Max Waves: %d\n"
                        "********************\n\n\n",
                        conf->num_sgprs, conf->num_vgprs,
                        conf->spilled_sgprs, conf->spilled_vgprs,
                        conf->private_mem_vgprs, code_size,
                        conf->lds_size, conf->scratch_bytes_per_wave,
-                       max_simd_waves);
+                       conf->max_simd_waves);
        }
 
        pipe_debug_message(debug, SHADER_INFO,
                           "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
                           "LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d 
"
                           "Spilled VGPRs: %d PrivMem VGPRs: %d",
                           conf->num_sgprs, conf->num_vgprs, code_size,
                           conf->lds_size, conf->scratch_bytes_per_wave,
-                          max_simd_waves, conf->spilled_sgprs,
+                          conf->max_simd_waves, conf->spilled_sgprs,
                           conf->spilled_vgprs, conf->private_mem_vgprs);
 }
 
 const char *si_get_shader_name(const struct si_shader *shader, unsigned 
processor)
 {
        switch (processor) {
        case PIPE_SHADER_VERTEX:
                if (shader->key.as_es)
                        return "Vertex Shader as ES";
                else if (shader->key.as_ls)
@@ -6960,20 +6968,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) {
                        shader->info.ancillary_vgpr_index = 
shader->info.num_input_vgprs;
                        shader->info.num_input_vgprs += 1;
                }
                if 
(G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
                        shader->info.num_input_vgprs += 1;
                if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
                        shader->info.num_input_vgprs += 1;
        }
 
+       si_calculate_max_simd_waves(shader);
        return 0;
 }
 
 /**
  * Create, compile and return a shader part (prolog or epilog).
  *
  * \param sscreen      screen
  * \param list         list of shader parts of the same category
  * \param type         shader type
  * \param key          shader part key
@@ -8033,20 +8042,21 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
                                                        
shader->prolog2->config.num_sgprs);
                        shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs,
                                                        
shader->prolog2->config.num_vgprs);
                }
                if (shader->epilog) {
                        shader->config.num_sgprs = 
MAX2(shader->config.num_sgprs,
                                                        
shader->epilog->config.num_sgprs);
                        shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs,
                                                        
shader->epilog->config.num_vgprs);
                }
+               si_calculate_max_simd_waves(shader);
        }
 
        si_fix_resource_usage(sscreen, shader);
        si_shader_dump(sscreen, shader, debug, sel->info.processor,
                       stderr, true);
 
        /* Upload. */
        r = si_shader_binary_upload(sscreen, shader);
        if (r) {
                fprintf(stderr, "LLVM failed to upload shader\n");
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index c449aa9..6ed1646 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -550,20 +550,21 @@ struct si_shader_key {
 /* Restore the pack alignment to default. */
 #pragma pack(pop)
 
 struct si_shader_config {
        unsigned                        num_sgprs;
        unsigned                        num_vgprs;
        unsigned                        spilled_sgprs;
        unsigned                        spilled_vgprs;
        unsigned                        private_mem_vgprs;
        unsigned                        lds_size;
+       unsigned                        max_simd_waves;
        unsigned                        spi_ps_input_ena;
        unsigned                        spi_ps_input_addr;
        unsigned                        float_mode;
        unsigned                        scratch_bytes_per_wave;
        unsigned                        rsrc1;
        unsigned                        rsrc2;
 };
 
 /* GCN-specific shader info. */
 struct si_shader_info {
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to