On 10/25/2017 02:20 PM, Alex Smith wrote:
On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoi...@gmail.com <mailto:samuel.pitoi...@gmail.com>> wrote:

    I have something similar on my local tree (started on monday).

    Though, I don't like the way we expose the number of VGPRS/SGPRS
    because we can't really figure out the number of spilled ones.


My assumption was that if we've spilled then we've used all available registers, so if numUsed{V,S}gprs is greater than the number available, then you'd know that the number spilled is the difference between the two. Can we have spilling when num_{v,s}gprs is less than the number available?

Assuming the number of waves per CU is 4, I would go with:

num_available_vgprs = num_physical_vgprs (ie. 256) / max_simd_waves (aligned down to 4).

(or we can just set num_available_vgprs to conf->num_vgprs and return num_used_vgprs = conf->num_vgprs + conf->num_spilled_sgprs).

That way, if num_used_vgprs is greater than num_available_vgprs we know that we are spilling some vgprs.

For the number of available SGPRs, I think we can just hardcode the value to 104 for now.

Also with this, we can easily re-compute the maximum number of waves.


Alex



    On 10/25/2017 01:18 PM, Alex Smith wrote:

        This allows an app to query shader statistics and get a
        disassembly of
        a shader. RenderDoc git has support for it, so this allows you
        to view
        shader disassembly from a capture.

        When this extension is enabled on a device (or when tracing), we now
        disable pipeline caching, since we don't get the shader debug
        info when
        we retrieve cached shaders.

        Signed-off-by: Alex Smith <asm...@feralinteractive.com
        <mailto:asm...@feralinteractive.com>>
        ---
           src/amd/vulkan/radv_device.c         |   9 ++
           src/amd/vulkan/radv_extensions.py    |   1 +
           src/amd/vulkan/radv_pipeline.c       |   2 +-
           src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
           src/amd/vulkan/radv_private.h        |   3 +
           src/amd/vulkan/radv_shader.c         | 163
        ++++++++++++++++++++++++++++-------
           6 files changed, 154 insertions(+), 35 deletions(-)

        diff --git a/src/amd/vulkan/radv_device.c
        b/src/amd/vulkan/radv_device.c
        index c4e25222ea..5603551680 100644
        --- a/src/amd/vulkan/radv_device.c
        +++ b/src/amd/vulkan/radv_device.c
        @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
                 VkResult result;
                 struct radv_device *device;
           +     bool keep_shader_info = false;
        +
                 for (uint32_t i = 0; i <
        pCreateInfo->enabledExtensionCount; i++) {
                         const char *ext_name =
        pCreateInfo->ppEnabledExtensionNames[i];
                         if
        (!radv_physical_device_extension_supported(physical_device,
        ext_name))
                                 return
        vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
        +
        +               if (strcmp(ext_name,
        VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
        +                       keep_shader_info = true;
                 }
                 /* Check enabled features */
        @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
                         device->physical_device->rad_info.max_se >= 2;
                 if (getenv("RADV_TRACE_FILE")) {
        +               keep_shader_info = true;
        +
                         if (!radv_init_trace(device))
                                 goto fail;
                 }
           +     device->keep_shader_info = keep_shader_info;
        +
                 result = radv_device_init_meta(device);
                 if (result != VK_SUCCESS)
                         goto fail;
        diff --git a/src/amd/vulkan/radv_extensions.py
        b/src/amd/vulkan/radv_extensions.py
        index dfeb2880fc..eeb679d65a 100644
        --- a/src/amd/vulkan/radv_extensions.py
        +++ b/src/amd/vulkan/radv_extensions.py
        @@ -81,6 +81,7 @@ EXTENSIONS = [
               Extension('VK_EXT_global_priority',                   1,
        'device->rad_info.has_ctx_priority'),
               Extension('VK_AMD_draw_indirect_count',               1,
        True),
               Extension('VK_AMD_rasterization_order',               1,
        'device->rad_info.chip_class >= VI && device->rad_info.max_se >=
        2'),
        +    Extension('VK_AMD_shader_info',                       1, True),
           ]
             class VkVersion:
        diff --git a/src/amd/vulkan/radv_pipeline.c
        b/src/amd/vulkan/radv_pipeline.c
        index d6b33a5327..2df03a83cf 100644
        --- a/src/amd/vulkan/radv_pipeline.c
        +++ b/src/amd/vulkan/radv_pipeline.c
        @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct
        radv_pipeline *pipeline,
                                 if (device->instance->debug_flags &
        RADV_DEBUG_DUMP_SHADERS)
                                         nir_print_shader(nir[i], stderr);
           -                     if (!pipeline->device->trace_bo)
        +                       if (!pipeline->device->keep_shader_info)
                                         ralloc_free(nir[i]);
                         }
                 }
        diff --git a/src/amd/vulkan/radv_pipeline_cache.c
        b/src/amd/vulkan/radv_pipeline_cache.c
        index 9ba9a3b61b..46198799a7 100644
        --- a/src/amd/vulkan/radv_pipeline_cache.c
        +++ b/src/amd/vulkan/radv_pipeline_cache.c
        @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct
        radv_pipeline_cache *cache,
                 cache->hash_table = malloc(byte_size);
                 /* We don't consider allocation failure fatal, we just
        start with a 0-sized
        -        * cache. */
        +        * cache. Disable caching when we want to keep shader
        debug info, since
        +        * we don't get the debug info on cached shaders. */
                 if (cache->hash_table == NULL ||
        -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
        +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
        +           device->keep_shader_info)
                         cache->table_size = 0;
                 else
                         memset(cache->hash_table, 0, byte_size);
        @@ -186,8 +188,11 @@
        radv_create_shader_variants_from_pipeline_cache(struct
        radv_device *device,
                 entry = radv_pipeline_cache_search_unlocked(cache, sha1);
                 if (!entry) {
        +               /* Again, don't cache when we want debug info,
        since this isn't
        +                * present in the cache. */
                         if (!device->physical_device->disk_cache ||
        -                   (device->instance->debug_flags &
        RADV_DEBUG_NO_CACHE)) {
        +                   (device->instance->debug_flags &
        RADV_DEBUG_NO_CACHE) ||
        +                   device->keep_shader_info) {
                                 pthread_mutex_unlock(&cache->mutex);
                                 return false;
                         }
        diff --git a/src/amd/vulkan/radv_private.h
        b/src/amd/vulkan/radv_private.h
        index a4e52b2530..169df5f37b 100644
        --- a/src/amd/vulkan/radv_private.h
        +++ b/src/amd/vulkan/radv_private.h
        @@ -552,6 +552,9 @@ struct radv_device {
                 struct radeon_winsys_bo                      *trace_bo;
                 uint32_t                                     *trace_id_ptr;
           +     /* Whether to keep shader debug info, for tracing or
        VK_AMD_shader_info */
+       bool  keep_shader_info;
        +
        struct radv_physical_device *physical_device;
                 /* Backup in-memory cache to be used if the app doesn't
        provide one */
        diff --git a/src/amd/vulkan/radv_shader.c
        b/src/amd/vulkan/radv_shader.c
        index 5903917068..7f2f0fd750 100644
        --- a/src/amd/vulkan/radv_shader.c
        +++ b/src/amd/vulkan/radv_shader.c
        @@ -46,6 +46,8 @@
           #include "util/debug.h"
           #include "ac_exp_param.h"
           +#include "util/string_buffer.h"
        +
           static const struct nir_shader_compiler_options nir_options = {
                 .vertex_id_zero_based = true,
                 .lower_scmp = true,
        @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device
        *device,
                 free(binary.relocs);
                 variant->ref_count = 1;
           -     if (device->trace_bo) {
        +       if (device->keep_shader_info) {
                         variant->disasm_string = binary.disasm_string;
                         if (!gs_copy_shader && !module->nir) {
                                 variant->nir = *shaders;
        @@ -593,11 +595,20 @@ radv_get_shader_name(struct
        radv_shader_variant *var, gl_shader_stage stage)
                 };
           }
           -void
        -radv_shader_dump_stats(struct radv_device *device,
        -                      struct radv_shader_variant *variant,
        -                      gl_shader_stage stage,
        -                      FILE *file)
        +static uint32_t
        +get_total_sgprs(struct radv_device *device)
        +{
        +       if (device->physical_device->rad_info.chip_class >= VI)
        +               return 800;
        +       else
        +               return 512;
        +}
        +
        +static void
        +generate_shader_stats(struct radv_device *device,
        +                     struct radv_shader_variant *variant,
        +                     gl_shader_stage stage,
        +                     struct _mesa_string_buffer *buf)
           {
                 unsigned lds_increment =
        device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
                 struct ac_shader_config *conf;
        @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device
        *device,
                                              lds_increment);
                 }
           -     if (conf->num_sgprs) {
        -               if (device->physical_device->rad_info.chip_class
         >= VI)
        -                       max_simd_waves = MIN2(max_simd_waves,
        800 / conf->num_sgprs);
        -               else
        -                       max_simd_waves = MIN2(max_simd_waves,
        512 / conf->num_sgprs);
        -       }
        +       if (conf->num_sgprs)
        +               max_simd_waves = MIN2(max_simd_waves,
        get_total_sgprs(device) / conf->num_sgprs);
                 if (conf->num_vgprs)
                         max_simd_waves = MIN2(max_simd_waves, 256 /
        conf->num_vgprs);
        @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device
        *device,
                 if (lds_per_wave)
                         max_simd_waves = MIN2(max_simd_waves, 16384 /
        lds_per_wave);
           +     if (stage == MESA_SHADER_FRAGMENT) {
        +               _mesa_string_buffer_printf(buf, "*** SHADER
        CONFIG ***\n"
        +                                          "SPI_PS_INPUT_ADDR =
        0x%04x\n"
        +                                          "SPI_PS_INPUT_ENA  =
        0x%04x\n",
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);
        +       }
        +
        +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
        +                                  "SGPRS: %d\n"
        +                                  "VGPRS: %d\n"
        +                                  "Spilled SGPRs: %d\n"
        +                                  "Spilled VGPRs: %d\n"
        +                                  "Code Size: %d bytes\n"
        +                                  "LDS: %d blocks\n"
        +                                  "Scratch: %d bytes per wave\n"
        +                                  "Max Waves: %d\n"
        +                                  "********************\n\n\n",
        +                                  conf->num_sgprs, conf->num_vgprs,
        +                                  conf->spilled_sgprs,
        conf->spilled_vgprs, variant->code_size,
        +                                  conf->lds_size,
        conf->scratch_bytes_per_wave,
        +                                  max_simd_waves);
        +}
        +
        +void
        +radv_shader_dump_stats(struct radv_device *device,
        +                      struct radv_shader_variant *variant,
        +                      gl_shader_stage stage,
        +                      FILE *file)
        +{
        +       struct _mesa_string_buffer *buf =
        _mesa_string_buffer_create(NULL, 256);
        +
        +       generate_shader_stats(device, variant, stage, buf);
        +
                 fprintf(file, "\n%s:\n", radv_get_shader_name(variant,
        stage));
        +       fprintf(file, buf->buf);
           -     if (stage == MESA_SHADER_FRAGMENT) {
        -               fprintf(file, "*** SHADER CONFIG ***\n"
        -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
        -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
        -                       conf->spi_ps_input_addr,
        conf->spi_ps_input_ena);
        +       _mesa_string_buffer_destroy(buf);
        +}
        +
        +VkResult
        +radv_GetShaderInfoAMD(VkDevice _device,
        +                     VkPipeline _pipeline,
        +                     VkShaderStageFlagBits shaderStage,
        +                     VkShaderInfoTypeAMD infoType,
        +                     size_t* pInfoSize,
        +                     void* pInfo)
        +{
        +       RADV_FROM_HANDLE(radv_device, device, _device);
        +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
        +       gl_shader_stage stage =
        vk_to_mesa_shader_stage(shaderStage);
        +       struct radv_shader_variant *variant =
        pipeline->shaders[stage];
        +       struct _mesa_string_buffer *buf;
        +       VkResult result = VK_SUCCESS;
        +
        +       /* Spec doesn't indicate what to do if the stage is
        invalid, so just
        +        * return no info for this. */
        +       if (!variant)
        +               return VK_ERROR_FEATURE_NOT_PRESENT;
        +
        +       switch (infoType) {
        +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
        +               if (!pInfo) {
        +                       *pInfoSize =
        sizeof(VkShaderStatisticsInfoAMD);
        +               } else {
        +                       struct ac_shader_config *conf =
        &variant->config;
        +
        +                       VkShaderStatisticsInfoAMD statistics = {};
        +                       statistics.shaderStageMask = shaderStage;
        +                       statistics.resourceUsage.numUsedVgprs =
        conf->num_vgprs + conf->spilled_vgprs;
        +                       statistics.resourceUsage.numUsedSgprs =
        conf->num_sgprs + conf->spilled_sgprs;
+  statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384; +  statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size; +  statistics.resourceUsage.scratchMemUsageInBytes =
        conf->scratch_bytes_per_wave;
        +                       statistics.numPhysicalVgprs =
        statistics.numAvailableVgprs = 256;
        +                       statistics.numPhysicalSgprs =
        statistics.numAvailableSgprs = get_total_sgprs(device);
        +                       statistics.computeWorkGroupSize[0] =
        variant->nir->info.cs.local_size[0];
        +                       statistics.computeWorkGroupSize[1] =
        variant->nir->info.cs.local_size[1];
        +                       statistics.computeWorkGroupSize[2] =
        variant->nir->info.cs.local_size[2];
        +
        +                       size_t size = *pInfoSize;
        +                       *pInfoSize = sizeof(statistics);
        +
        +                       memcpy(pInfo, &statistics, MIN2(size,
        *pInfoSize));
        +
        +                       if (size < *pInfoSize)
        +                               result = VK_INCOMPLETE;
        +               }
        +
        +               break;
        +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
        +               buf = _mesa_string_buffer_create(NULL, 1024);
        +
        +               _mesa_string_buffer_printf(buf, "%s:\n",
        radv_get_shader_name(variant, stage));
        +               _mesa_string_buffer_printf(buf, "%s\n\n",
        variant->disasm_string);
        +               generate_shader_stats(device, variant, stage, buf);
        +
        +               if (!pInfo) {
        +                       *pInfoSize = buf->length;
        +               } else {
        +                       size_t size = *pInfoSize;
        +                       *pInfoSize = buf->length;
        +
        +                       memcpy(pInfo, buf->buf, MIN2(size,
        buf->length));
        +
        +                       if (size < buf->length)
        +                               result = VK_INCOMPLETE;
        +               }
        +
        +               _mesa_string_buffer_destroy(buf);
        +               break;
        +       default:
        +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented
        for now. */
        +               result = VK_ERROR_FEATURE_NOT_PRESENT;
        +               break;
                 }
           -     fprintf(file, "*** SHADER STATS ***\n"
        -               "SGPRS: %d\n"
        -               "VGPRS: %d\n"
        -               "Spilled SGPRs: %d\n"
        -               "Spilled VGPRs: %d\n"
        -               "Code Size: %d bytes\n"
        -               "LDS: %d blocks\n"
        -               "Scratch: %d bytes per wave\n"
        -               "Max Waves: %d\n"
        -               "********************\n\n\n",
        -               conf->num_sgprs, conf->num_vgprs,
        -               conf->spilled_sgprs, conf->spilled_vgprs,
        variant->code_size,
        -               conf->lds_size, conf->scratch_bytes_per_wave,
        -               max_simd_waves);
        +       return result;
           }


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to