From: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeon/r600_pipe_common.c | 431 ----------------------- src/gallium/drivers/radeon/r600_pipe_common.h | 4 - src/gallium/drivers/radeonsi/si_pipe.c | 432 ++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_pipe.h | 4 + src/gallium/drivers/radeonsi/si_state_shaders.c | 14 +- 5 files changed, 443 insertions(+), 442 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index ce612113c51..036f380b0b3 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -29,21 +29,20 @@ #include "util/u_memory.h" #include "util/u_format_s3tc.h" #include "util/u_upload_mgr.h" #include "util/os_time.h" #include "vl/vl_decoder.h" #include "vl/vl_video_buffer.h" #include "radeon/radeon_video.h" #include "amd/common/ac_llvm_util.h" #include "amd/common/sid.h" #include <inttypes.h> -#include <sys/utsname.h> #include <llvm-c/TargetMachine.h> /* * shader binary helpers. */ void si_radeon_shader_binary_init(struct ac_shader_binary *b) { memset(b, 0, sizeof(*b)); @@ -632,139 +631,20 @@ static const struct debug_named_value common_debug_options[] = { { "nodpbb", DBG(NO_DPBB), "Disable DPBB." }, { "nodfsm", DBG(NO_DFSM), "Disable DFSM." }, { "dpbb", DBG(DPBB), "Enable DPBB." }, { "dfsm", DBG(DFSM), "Enable DFSM." }, { "nooutoforder", DBG(NO_OUT_OF_ORDER), "Disable out-of-order rasterization" }, { "reserve_vmid", DBG(RESERVE_VMID), "Force VMID reservation per context." }, DEBUG_NAMED_VALUE_END /* must be last */ }; -static const char* r600_get_vendor(struct pipe_screen* pscreen) -{ - return "X.Org"; -} - -static const char* r600_get_device_vendor(struct pipe_screen* pscreen) -{ - return "AMD"; -} - -static const char *r600_get_marketing_name(struct radeon_winsys *ws) -{ - if (!ws->get_chip_name) - return NULL; - return ws->get_chip_name(ws); -} - -static const char *r600_get_family_name(const struct r600_common_screen *rscreen) -{ - switch (rscreen->info.family) { - case CHIP_TAHITI: return "AMD TAHITI"; - case CHIP_PITCAIRN: return "AMD PITCAIRN"; - case CHIP_VERDE: return "AMD CAPE VERDE"; - case CHIP_OLAND: return "AMD OLAND"; - case CHIP_HAINAN: return "AMD HAINAN"; - case CHIP_BONAIRE: return "AMD BONAIRE"; - case CHIP_KAVERI: return "AMD KAVERI"; - case CHIP_KABINI: return "AMD KABINI"; - case CHIP_HAWAII: return "AMD HAWAII"; - case CHIP_MULLINS: return "AMD MULLINS"; - case CHIP_TONGA: return "AMD TONGA"; - case CHIP_ICELAND: return "AMD ICELAND"; - case CHIP_CARRIZO: return "AMD CARRIZO"; - case CHIP_FIJI: return "AMD FIJI"; - case CHIP_POLARIS10: return "AMD POLARIS10"; - case CHIP_POLARIS11: return "AMD POLARIS11"; - case CHIP_POLARIS12: return "AMD POLARIS12"; - case CHIP_STONEY: return "AMD STONEY"; - case CHIP_VEGA10: return "AMD VEGA10"; - case CHIP_RAVEN: return "AMD RAVEN"; - default: return "AMD unknown"; - } -} - -static void r600_disk_cache_create(struct r600_common_screen *rscreen) -{ - /* Don't use the cache if shader dumping is enabled. */ - if (rscreen->debug_flags & DBG_ALL_SHADERS) - return; - - /* TODO: remove this once gallium supports a nir cache */ - if (rscreen->debug_flags & DBG(NIR)) - return; - - uint32_t mesa_timestamp; - if (disk_cache_get_function_timestamp(r600_disk_cache_create, - &mesa_timestamp)) { - char *timestamp_str; - int res = -1; - uint32_t llvm_timestamp; - - if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, - &llvm_timestamp)) { - res = asprintf(×tamp_str, "%u_%u", - mesa_timestamp, llvm_timestamp); - } - - if (res != -1) { - /* These flags affect shader compilation. */ - uint64_t shader_debug_flags = - rscreen->debug_flags & - (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | - DBG(SI_SCHED) | - DBG(UNSAFE_MATH)); - - rscreen->disk_shader_cache = - disk_cache_create(r600_get_family_name(rscreen), - timestamp_str, - shader_debug_flags); - free(timestamp_str); - } - } -} - -static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; - return rscreen->disk_shader_cache; -} - -static const char* r600_get_name(struct pipe_screen* pscreen) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; - - return rscreen->renderer_string; -} - -static float r600_get_paramf(struct pipe_screen* pscreen, - enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - case PIPE_CAPF_MAX_POINT_WIDTH: - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 8192.0f; - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 16.0f; - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0f; - case PIPE_CAPF_GUARD_BAND_LEFT: - case PIPE_CAPF_GUARD_BAND_TOP: - case PIPE_CAPF_GUARD_BAND_RIGHT: - case PIPE_CAPF_GUARD_BAND_BOTTOM: - return 0.0f; - } - return 0.0f; -} - static int r600_get_video_param(struct pipe_screen *screen, enum pipe_video_profile profile, enum pipe_video_entrypoint entrypoint, enum pipe_video_cap param) { switch (param) { case PIPE_VIDEO_CAP_SUPPORTED: return vl_profile_supported(screen, profile, entrypoint); case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; @@ -779,390 +659,79 @@ static int r600_get_video_param(struct pipe_screen *screen, return false; case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: return true; case PIPE_VIDEO_CAP_MAX_LEVEL: return vl_level_supported(screen, profile); default: return 0; } } -static unsigned get_max_threads_per_block(struct r600_common_screen *screen, - enum pipe_shader_ir ir_type) -{ - if (ir_type != PIPE_SHADER_IR_TGSI) - return 256; - - /* Only 16 waves per thread-group on gfx9. */ - if (screen->chip_class >= GFX9) - return 1024; - - /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice - * round number. - */ - return 2048; -} - -static int r600_get_compute_param(struct pipe_screen *screen, - enum pipe_shader_ir ir_type, - enum pipe_compute_cap param, - void *ret) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; - - //TODO: select these params by asic - switch (param) { - case PIPE_COMPUTE_CAP_IR_TARGET: { - const char *gpu; - const char *triple; - - if (HAVE_LLVM < 0x0400) - triple = "amdgcn--"; - else - triple = "amdgcn-mesa-mesa3d"; - - gpu = ac_get_llvm_processor_name(rscreen->family); - if (ret) { - sprintf(ret, "%s-%s", gpu, triple); - } - /* +2 for dash and terminating NIL byte */ - return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); - } - case PIPE_COMPUTE_CAP_GRID_DIMENSION: - if (ret) { - uint64_t *grid_dimension = ret; - grid_dimension[0] = 3; - } - return 1 * sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: - if (ret) { - uint64_t *grid_size = ret; - grid_size[0] = 65535; - grid_size[1] = 65535; - grid_size[2] = 65535; - } - return 3 * sizeof(uint64_t) ; - - case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: - if (ret) { - uint64_t *block_size = ret; - unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); - block_size[0] = threads_per_block; - block_size[1] = threads_per_block; - block_size[2] = threads_per_block; - } - return 3 * sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: - if (ret) { - uint64_t *max_threads_per_block = ret; - *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type); - } - return sizeof(uint64_t); - case PIPE_COMPUTE_CAP_ADDRESS_BITS: - if (ret) { - uint32_t *address_bits = ret; - address_bits[0] = 64; - } - return 1 * sizeof(uint32_t); - - case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: - if (ret) { - uint64_t *max_global_size = ret; - uint64_t max_mem_alloc_size; - - r600_get_compute_param(screen, ir_type, - PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, - &max_mem_alloc_size); - - /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least - * 1/4 of the MAX_GLOBAL_SIZE. Since the - * MAX_MEM_ALLOC_SIZE is fixed for older kernels, - * make sure we never report more than - * 4 * MAX_MEM_ALLOC_SIZE. - */ - *max_global_size = MIN2(4 * max_mem_alloc_size, - MAX2(rscreen->info.gart_size, - rscreen->info.vram_size)); - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: - if (ret) { - uint64_t *max_local_size = ret; - /* Value reported by the closed source driver. */ - *max_local_size = 32768; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: - if (ret) { - uint64_t *max_input_size = ret; - /* Value reported by the closed source driver. */ - *max_input_size = 1024; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: - if (ret) { - uint64_t *max_mem_alloc_size = ret; - - *max_mem_alloc_size = rscreen->info.max_alloc_size; - } - return sizeof(uint64_t); - - case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: - if (ret) { - uint32_t *max_clock_frequency = ret; - *max_clock_frequency = rscreen->info.max_shader_clock; - } - return sizeof(uint32_t); - - case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: - if (ret) { - uint32_t *max_compute_units = ret; - *max_compute_units = rscreen->info.num_good_compute_units; - } - return sizeof(uint32_t); - - case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: - if (ret) { - uint32_t *images_supported = ret; - *images_supported = 0; - } - return sizeof(uint32_t); - case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: - break; /* unused */ - case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: - if (ret) { - uint32_t *subgroup_size = ret; - *subgroup_size = 64; - } - return sizeof(uint32_t); - case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: - if (ret) { - uint64_t *max_variable_threads_per_block = ret; - if (ir_type == PIPE_SHADER_IR_TGSI) - *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - else - *max_variable_threads_per_block = 0; - } - return sizeof(uint64_t); - } - - fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); - return 0; -} - -static uint64_t r600_get_timestamp(struct pipe_screen *screen) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - - return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / - rscreen->info.clock_crystal_freq; -} - -static void r600_query_memory_info(struct pipe_screen *screen, - struct pipe_memory_info *info) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; - unsigned vram_usage, gtt_usage; - - info->total_device_memory = rscreen->info.vram_size / 1024; - info->total_staging_memory = rscreen->info.gart_size / 1024; - - /* The real TTM memory usage is somewhat random, because: - * - * 1) TTM delays freeing memory, because it can only free it after - * fences expire. - * - * 2) The memory usage can be really low if big VRAM evictions are - * taking place, but the real usage is well above the size of VRAM. - * - * Instead, return statistics of this process. - */ - vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; - gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; - - info->avail_device_memory = - vram_usage <= info->total_device_memory ? - info->total_device_memory - vram_usage : 0; - info->avail_staging_memory = - gtt_usage <= info->total_staging_memory ? - info->total_staging_memory - gtt_usage : 0; - - info->device_memory_evicted = - ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; - - if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) - info->nr_device_memory_evictions = - ws->query_value(ws, RADEON_NUM_EVICTIONS); - else - /* Just return the number of evicted 64KB pages. */ - info->nr_device_memory_evictions = info->device_memory_evicted / 64; -} - struct pipe_resource *si_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ) { if (templ->target == PIPE_BUFFER) { return si_buffer_create(screen, templ, 256); } else { return si_texture_create(screen, templ); } } bool si_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { - char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; - struct utsname uname_data; - const char *chip_name; - - ws->query_info(ws, &rscreen->info); - rscreen->ws = ws; - - if ((chip_name = r600_get_marketing_name(ws))) - snprintf(family_name, sizeof(family_name), "%s / ", - r600_get_family_name(rscreen) + 4); - else - chip_name = r600_get_family_name(rscreen); - - if (uname(&uname_data) == 0) - snprintf(kernel_version, sizeof(kernel_version), - " / %s", uname_data.release); - - if (HAVE_LLVM > 0) { - snprintf(llvm_string, sizeof(llvm_string), - ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, - HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); - } - - snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), - "%s (%sDRM %i.%i.%i%s%s)", - chip_name, family_name, rscreen->info.drm_major, - rscreen->info.drm_minor, rscreen->info.drm_patchlevel, - kernel_version, llvm_string); - - rscreen->b.get_name = r600_get_name; - rscreen->b.get_vendor = r600_get_vendor; - rscreen->b.get_device_vendor = r600_get_device_vendor; - rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache; - rscreen->b.get_compute_param = r600_get_compute_param; - rscreen->b.get_paramf = r600_get_paramf; - rscreen->b.get_timestamp = r600_get_timestamp; rscreen->b.resource_destroy = u_resource_destroy_vtbl; rscreen->b.resource_from_user_memory = si_buffer_from_user_memory; - rscreen->b.query_memory_info = r600_query_memory_info; if (rscreen->info.has_hw_decode) { rscreen->b.get_video_param = si_vid_get_video_param; rscreen->b.is_video_format_supported = si_vid_is_format_supported; } else { rscreen->b.get_video_param = r600_get_video_param; rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; } si_init_screen_texture_functions(rscreen); si_init_screen_query_functions(rscreen); rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); rscreen->has_rbplus = false; rscreen->rbplus_allowed = false; - r600_disk_cache_create(rscreen); - slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); if (rscreen->force_aniso >= 0) { printf("radeon: Forcing anisotropy filter to %ix\n", /* round down to a power of two */ 1 << util_logbase2(rscreen->force_aniso)); } (void) mtx_init(&rscreen->aux_context_lock, mtx_plain); (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain); - if (rscreen->debug_flags & DBG(INFO)) { - printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", - rscreen->info.pci_domain, rscreen->info.pci_bus, - rscreen->info.pci_dev, rscreen->info.pci_func); - printf("pci_id = 0x%x\n", rscreen->info.pci_id); - printf("family = %i (%s)\n", rscreen->info.family, - r600_get_family_name(rscreen)); - printf("chip_class = %i\n", rscreen->info.chip_class); - printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size); - printf("gart_page_size = %u\n", rscreen->info.gart_page_size); - printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); - printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); - printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024)); - printf("max_alloc_size = %i MB\n", - (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); - printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); - printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram); - printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); - printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); - printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode); - printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings); - printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings); - printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version); - printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version); - printf("me_fw_version = %i\n", rscreen->info.me_fw_version); - printf("me_fw_feature = %i\n", rscreen->info.me_fw_feature); - printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version); - printf("pfp_fw_feature = %i\n", rscreen->info.pfp_fw_feature); - printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version); - printf("ce_fw_feature = %i\n", rscreen->info.ce_fw_feature); - printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); - printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); - printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size); - printf("drm = %i.%i.%i\n", rscreen->info.drm_major, - rscreen->info.drm_minor, rscreen->info.drm_patchlevel); - printf("has_userptr = %i\n", rscreen->info.has_userptr); - printf("has_syncobj = %u\n", rscreen->info.has_syncobj); - printf("has_sync_file = %u\n", rscreen->info.has_sync_file); - - printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); - printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); - printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); - printf("max_se = %i\n", rscreen->info.max_se); - printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); - - printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); - printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); - printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); - printf("num_render_backends = %i\n", rscreen->info.num_render_backends); - printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); - printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); - printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask); - printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment); - } return true; } void si_destroy_common_screen(struct r600_common_screen *rscreen) { si_perfcounters_destroy(rscreen); si_gpu_load_kill_thread(rscreen); mtx_destroy(&rscreen->gpu_load_mutex); mtx_destroy(&rscreen->aux_context_lock); rscreen->aux_context->destroy(rscreen->aux_context); slab_destroy_parent(&rscreen->pool_transfers); - disk_cache_destroy(rscreen->disk_shader_cache); rscreen->ws->destroy(rscreen->ws); FREE(rscreen); } bool si_can_dump_shader(struct r600_common_screen *rscreen, unsigned processor) { return rscreen->debug_flags & (1 << processor); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index adfcc7c8a70..4b80d188fba 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -387,22 +387,20 @@ struct r600_memory_object { struct r600_common_screen { struct pipe_screen b; struct radeon_winsys *ws; enum radeon_family family; enum chip_class chip_class; struct radeon_info info; uint64_t debug_flags; bool has_rbplus; /* if RB+ registers exist */ bool rbplus_allowed; /* if RB+ is allowed */ - struct disk_cache *disk_shader_cache; - struct slab_parent_pool pool_transfers; /* Texture filter settings. */ int force_aniso; /* -1 = disabled */ /* Auxiliary context. Mainly used to initialize resources. * It must be locked prior to using and flushed before unlocking. */ struct pipe_context *aux_context; mtx_t aux_context_lock; @@ -415,22 +413,20 @@ struct r600_common_screen { */ unsigned num_shaders_created; unsigned num_shader_cache_hits; /* GPU load thread. */ mtx_t gpu_load_mutex; thrd_t gpu_load_thread; union r600_mmio_counters mmio_counters; volatile unsigned gpu_load_stop_thread; /* bool */ - char renderer_string[100]; - /* Performance counters. */ struct r600_perfcounters *perfcounters; /* If pipe_screen wants to recompute and re-emit the framebuffer, * sampler, and image states of all contexts, it should atomically * increment this. * * Each context will compare this with its own last known value of * the counter before drawing and re-emit the states accordingly. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index b3d8ae508bd..b38c55619f7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -31,20 +31,22 @@ #include "util/u_log.h" #include "util/u_memory.h" #include "util/u_suballoc.h" #include "util/u_tests.h" #include "util/xmlconfig.h" #include "vl/vl_decoder.h" #include "../ddebug/dd_util.h" #include "compiler/nir/nir.h" +#include <sys/utsname.h> + /* * pipe_context */ static void si_destroy_context(struct pipe_context *context) { struct si_context *sctx = (struct si_context *)context; int i; /* Unreference the framebuffer normally to disable related logic * properly. @@ -394,20 +396,306 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen, * implementation for fence_server_sync is incomplete. */ return threaded_context_create(ctx, &sscreen->b.pool_transfers, si_replace_buffer_storage, sscreen->b.info.drm_major >= 3 ? si_create_fence : NULL, &((struct si_context*)ctx)->b.tc); } /* * pipe_screen */ +static const char* si_get_vendor(struct pipe_screen* pscreen) +{ + return "X.Org"; +} + +static const char* si_get_device_vendor(struct pipe_screen* pscreen) +{ + return "AMD"; +} + +static const char *si_get_marketing_name(struct radeon_winsys *ws) +{ + if (!ws->get_chip_name) + return NULL; + return ws->get_chip_name(ws); +} + +static const char *si_get_family_name(const struct si_screen *screen) +{ + switch (screen->b.info.family) { + case CHIP_TAHITI: return "AMD TAHITI"; + case CHIP_PITCAIRN: return "AMD PITCAIRN"; + case CHIP_VERDE: return "AMD CAPE VERDE"; + case CHIP_OLAND: return "AMD OLAND"; + case CHIP_HAINAN: return "AMD HAINAN"; + case CHIP_BONAIRE: return "AMD BONAIRE"; + case CHIP_KAVERI: return "AMD KAVERI"; + case CHIP_KABINI: return "AMD KABINI"; + case CHIP_HAWAII: return "AMD HAWAII"; + case CHIP_MULLINS: return "AMD MULLINS"; + case CHIP_TONGA: return "AMD TONGA"; + case CHIP_ICELAND: return "AMD ICELAND"; + case CHIP_CARRIZO: return "AMD CARRIZO"; + case CHIP_FIJI: return "AMD FIJI"; + case CHIP_POLARIS10: return "AMD POLARIS10"; + case CHIP_POLARIS11: return "AMD POLARIS11"; + case CHIP_POLARIS12: return "AMD POLARIS12"; + case CHIP_STONEY: return "AMD STONEY"; + case CHIP_VEGA10: return "AMD VEGA10"; + case CHIP_RAVEN: return "AMD RAVEN"; + default: return "AMD unknown"; + } +} + +static void si_disk_cache_create(struct si_screen *screen) +{ + /* Don't use the cache if shader dumping is enabled. */ + if (screen->b.debug_flags & DBG_ALL_SHADERS) + return; + + /* TODO: remove this once gallium supports a nir cache */ + if (screen->b.debug_flags & DBG(NIR)) + return; + + uint32_t mesa_timestamp; + if (disk_cache_get_function_timestamp(si_disk_cache_create, + &mesa_timestamp)) { + char *timestamp_str; + int res = -1; + uint32_t llvm_timestamp; + + if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, + &llvm_timestamp)) { + res = asprintf(×tamp_str, "%u_%u", + mesa_timestamp, llvm_timestamp); + } + + if (res != -1) { + /* These flags affect shader compilation. */ + uint64_t shader_debug_flags = + screen->b.debug_flags & + (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | + DBG(SI_SCHED) | + DBG(UNSAFE_MATH)); + + screen->disk_shader_cache = + disk_cache_create(si_get_family_name(screen), + timestamp_str, + shader_debug_flags); + free(timestamp_str); + } + } +} + +static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen) +{ + struct si_screen *sscreen = (struct si_screen*)pscreen; + return sscreen->disk_shader_cache; +} + +static const char* si_get_name(struct pipe_screen* pscreen) +{ + struct si_screen *sscreen = (struct si_screen*)pscreen; + + return sscreen->renderer_string; +} + +static float si_get_paramf(struct pipe_screen* pscreen, + enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + } + return 0.0f; +} + +static unsigned get_max_threads_per_block(struct si_screen *screen, + enum pipe_shader_ir ir_type) +{ + if (ir_type != PIPE_SHADER_IR_TGSI) + return 256; + + /* Only 16 waves per thread-group on gfx9. */ + if (screen->b.chip_class >= GFX9) + return 1024; + + /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice + * round number. + */ + return 2048; +} + +static int si_get_compute_param(struct pipe_screen *screen, + enum pipe_shader_ir ir_type, + enum pipe_compute_cap param, + void *ret) +{ + struct si_screen *sscreen = (struct si_screen *)screen; + + switch (param) { + case PIPE_COMPUTE_CAP_IR_TARGET: { + const char *gpu; + const char *triple; + + if (HAVE_LLVM < 0x0400) + triple = "amdgcn--"; + else + triple = "amdgcn-mesa-mesa3d"; + + gpu = ac_get_llvm_processor_name(sscreen->b.family); + if (ret) { + sprintf(ret, "%s-%s", gpu, triple); + } + /* +2 for dash and terminating NIL byte */ + return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); + } + case PIPE_COMPUTE_CAP_GRID_DIMENSION: + if (ret) { + uint64_t *grid_dimension = ret; + grid_dimension[0] = 3; + } + return 1 * sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: + if (ret) { + uint64_t *grid_size = ret; + grid_size[0] = 65535; + grid_size[1] = 65535; + grid_size[2] = 65535; + } + return 3 * sizeof(uint64_t) ; + + case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: + if (ret) { + uint64_t *block_size = ret; + unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type); + block_size[0] = threads_per_block; + block_size[1] = threads_per_block; + block_size[2] = threads_per_block; + } + return 3 * sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: + if (ret) { + uint64_t *max_threads_per_block = ret; + *max_threads_per_block = get_max_threads_per_block(sscreen, ir_type); + } + return sizeof(uint64_t); + case PIPE_COMPUTE_CAP_ADDRESS_BITS: + if (ret) { + uint32_t *address_bits = ret; + address_bits[0] = 64; + } + return 1 * sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: + if (ret) { + uint64_t *max_global_size = ret; + uint64_t max_mem_alloc_size; + + si_get_compute_param(screen, ir_type, + PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, + &max_mem_alloc_size); + + /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least + * 1/4 of the MAX_GLOBAL_SIZE. Since the + * MAX_MEM_ALLOC_SIZE is fixed for older kernels, + * make sure we never report more than + * 4 * MAX_MEM_ALLOC_SIZE. + */ + *max_global_size = MIN2(4 * max_mem_alloc_size, + MAX2(sscreen->b.info.gart_size, + sscreen->b.info.vram_size)); + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: + if (ret) { + uint64_t *max_local_size = ret; + /* Value reported by the closed source driver. */ + *max_local_size = 32768; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: + if (ret) { + uint64_t *max_input_size = ret; + /* Value reported by the closed source driver. */ + *max_input_size = 1024; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: + if (ret) { + uint64_t *max_mem_alloc_size = ret; + + *max_mem_alloc_size = sscreen->b.info.max_alloc_size; + } + return sizeof(uint64_t); + + case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: + if (ret) { + uint32_t *max_clock_frequency = ret; + *max_clock_frequency = sscreen->b.info.max_shader_clock; + } + return sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: + if (ret) { + uint32_t *max_compute_units = ret; + *max_compute_units = sscreen->b.info.num_good_compute_units; + } + return sizeof(uint32_t); + + case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: + if (ret) { + uint32_t *images_supported = ret; + *images_supported = 0; + } + return sizeof(uint32_t); + case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: + break; /* unused */ + case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: + if (ret) { + uint32_t *subgroup_size = ret; + *subgroup_size = 64; + } + return sizeof(uint32_t); + case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: + if (ret) { + uint64_t *max_variable_threads_per_block = ret; + if (ir_type == PIPE_SHADER_IR_TGSI) + *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; + else + *max_variable_threads_per_block = 0; + } + return sizeof(uint64_t); + } + + fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); + return 0; +} + static bool si_have_tgsi_compute(struct si_screen *sscreen) { /* Old kernels disallowed some register writes for SI * that are used for indirect dispatches. */ return (sscreen->b.chip_class >= CIK || sscreen->b.info.drm_major == 3 || (sscreen->b.info.drm_major == 2 && sscreen->b.info.drm_minor >= 45)); } @@ -823,20 +1111,69 @@ static const struct nir_shader_compiler_options nir_options = { static const void * si_get_compiler_options(struct pipe_screen *screen, enum pipe_shader_ir ir, enum pipe_shader_type shader) { assert(ir == PIPE_SHADER_IR_NIR); return &nir_options; } +static uint64_t si_get_timestamp(struct pipe_screen *screen) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + + return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / + rscreen->info.clock_crystal_freq; +} + +static void si_query_memory_info(struct pipe_screen *screen, + struct pipe_memory_info *info) +{ + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; + struct radeon_winsys *ws = rscreen->ws; + unsigned vram_usage, gtt_usage; + + info->total_device_memory = rscreen->info.vram_size / 1024; + info->total_staging_memory = rscreen->info.gart_size / 1024; + + /* The real TTM memory usage is somewhat random, because: + * + * 1) TTM delays freeing memory, because it can only free it after + * fences expire. + * + * 2) The memory usage can be really low if big VRAM evictions are + * taking place, but the real usage is well above the size of VRAM. + * + * Instead, return statistics of this process. + */ + vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; + gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; + + info->avail_device_memory = + vram_usage <= info->total_device_memory ? + info->total_device_memory - vram_usage : 0; + info->avail_staging_memory = + gtt_usage <= info->total_staging_memory ? + info->total_staging_memory - gtt_usage : 0; + + info->device_memory_evicted = + ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; + + if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) + info->nr_device_memory_evictions = + ws->query_value(ws, RADEON_NUM_EVICTIONS); + else + /* Just return the number of evicted 64KB pages. */ + info->nr_device_memory_evictions = info->device_memory_evicted / 64; +} + static void si_destroy_screen(struct pipe_screen* pscreen) { struct si_screen *sscreen = (struct si_screen *)pscreen; struct si_shader_part *parts[] = { sscreen->vs_prologs, sscreen->tcs_epilogs, sscreen->gs_prologs, sscreen->ps_prologs, sscreen->ps_epilogs }; @@ -861,20 +1198,21 @@ static void si_destroy_screen(struct pipe_screen* pscreen) while (parts[i]) { struct si_shader_part *part = parts[i]; parts[i] = part->next; si_radeon_shader_binary_clean(&part->binary); FREE(part); } } mtx_destroy(&sscreen->shader_parts_mutex); si_destroy_shader_cache(sscreen); + disk_cache_destroy(sscreen->disk_shader_cache); si_destroy_common_screen(&sscreen->b); } static bool si_init_gs_info(struct si_screen *sscreen) { /* gs_table_depth is not used by GFX9 */ if (sscreen->b.chip_class >= GFX9) return true; switch (sscreen->b.family) { @@ -977,34 +1315,71 @@ static void radeonsi_get_device_uuid(struct pipe_screen *pscreen, char *uuid) { struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen; ac_compute_device_uuid(&rscreen->info, uuid, PIPE_UUID_SIZE); } struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, const struct pipe_screen_config *config) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); + char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; unsigned num_threads, num_compiler_threads, num_compiler_threads_lowprio, i; + struct utsname uname_data; + const char *chip_name; if (!sscreen) { return NULL; } + + ws->query_info(ws, &sscreen->b.info); + sscreen->b.ws = ws; + + if ((chip_name = si_get_marketing_name(ws))) + snprintf(family_name, sizeof(family_name), "%s / ", + si_get_family_name(sscreen) + 4); + else + chip_name = si_get_family_name(sscreen); + + if (uname(&uname_data) == 0) + snprintf(kernel_version, sizeof(kernel_version), + " / %s", uname_data.release); + + if (HAVE_LLVM > 0) { + snprintf(llvm_string, sizeof(llvm_string), + ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, + HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); + } + + snprintf(sscreen->renderer_string, sizeof(sscreen->renderer_string), + "%s (%sDRM %i.%i.%i%s%s)", + chip_name, family_name, sscreen->b.info.drm_major, + sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel, + kernel_version, llvm_string); + /* Set functions first. */ sscreen->b.b.context_create = si_pipe_create_context; sscreen->b.b.destroy = si_destroy_screen; + sscreen->b.b.get_name = si_get_name; + sscreen->b.b.get_vendor = si_get_vendor; + sscreen->b.b.get_device_vendor = si_get_device_vendor; + sscreen->b.b.get_disk_shader_cache = si_get_disk_shader_cache; + sscreen->b.b.get_compute_param = si_get_compute_param; + sscreen->b.b.get_paramf = si_get_paramf; sscreen->b.b.get_param = si_get_param; sscreen->b.b.get_shader_param = si_get_shader_param; sscreen->b.b.get_compiler_options = si_get_compiler_options; sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid; sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid; + sscreen->b.b.get_timestamp = si_get_timestamp; + sscreen->b.b.query_memory_info = si_query_memory_info; sscreen->b.b.resource_create = si_resource_create_common; si_init_screen_fence_functions(sscreen); si_init_screen_state_functions(sscreen); /* Set these flags in debug_flags early, so that the shader cache takes * them into account. */ if (driQueryOptionb(config->options, "glsl_correct_derivatives_after_discard")) @@ -1012,20 +1387,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, if (driQueryOptionb(config->options, "radeonsi_enable_sisched")) sscreen->b.debug_flags |= DBG(SI_SCHED); if (!si_common_screen_init(&sscreen->b, ws) || !si_init_gs_info(sscreen) || !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } + si_disk_cache_create(sscreen); + /* Only enable as many threads as we have target machines, but at most * the number of CPUs - 1 if there is more than one. */ num_threads = sysconf(_SC_NPROCESSORS_ONLN); num_threads = MAX2(1, num_threads - 1); num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm)); num_compiler_threads_lowprio = MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority)); if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader", @@ -1144,20 +1521,75 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->b.debug_flags |= DBG_ALL_SHADERS; for (i = 0; i < num_compiler_threads; i++) sscreen->tm[i] = si_create_llvm_target_machine(sscreen); for (i = 0; i < num_compiler_threads_lowprio; i++) sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen); /* Create the auxiliary context. This must be done last. */ sscreen->b.aux_context = si_create_context(&sscreen->b.b, 0); + if (sscreen->b.debug_flags & DBG(INFO)) { + printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", + sscreen->b.info.pci_domain, sscreen->b.info.pci_bus, + sscreen->b.info.pci_dev, sscreen->b.info.pci_func); + printf("pci_id = 0x%x\n", sscreen->b.info.pci_id); + printf("family = %i (%s)\n", sscreen->b.info.family, + si_get_family_name(sscreen)); + printf("chip_class = %i\n", sscreen->b.info.chip_class); + printf("pte_fragment_size = %u\n", sscreen->b.info.pte_fragment_size); + printf("gart_page_size = %u\n", sscreen->b.info.gart_page_size); + printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(sscreen->b.info.gart_size, 1024*1024)); + printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(sscreen->b.info.vram_size, 1024*1024)); + printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(sscreen->b.info.vram_vis_size, 1024*1024)); + printf("max_alloc_size = %i MB\n", + (int)DIV_ROUND_UP(sscreen->b.info.max_alloc_size, 1024*1024)); + printf("min_alloc_size = %u\n", sscreen->b.info.min_alloc_size); + printf("has_dedicated_vram = %u\n", sscreen->b.info.has_dedicated_vram); + printf("has_virtual_memory = %i\n", sscreen->b.info.has_virtual_memory); + printf("gfx_ib_pad_with_type2 = %i\n", sscreen->b.info.gfx_ib_pad_with_type2); + printf("has_hw_decode = %u\n", sscreen->b.info.has_hw_decode); + printf("num_sdma_rings = %i\n", sscreen->b.info.num_sdma_rings); + printf("num_compute_rings = %u\n", sscreen->b.info.num_compute_rings); + printf("uvd_fw_version = %u\n", sscreen->b.info.uvd_fw_version); + printf("vce_fw_version = %u\n", sscreen->b.info.vce_fw_version); + printf("me_fw_version = %i\n", sscreen->b.info.me_fw_version); + printf("me_fw_feature = %i\n", sscreen->b.info.me_fw_feature); + printf("pfp_fw_version = %i\n", sscreen->b.info.pfp_fw_version); + printf("pfp_fw_feature = %i\n", sscreen->b.info.pfp_fw_feature); + printf("ce_fw_version = %i\n", sscreen->b.info.ce_fw_version); + printf("ce_fw_feature = %i\n", sscreen->b.info.ce_fw_feature); + printf("vce_harvest_config = %i\n", sscreen->b.info.vce_harvest_config); + printf("clock_crystal_freq = %i\n", sscreen->b.info.clock_crystal_freq); + printf("tcc_cache_line_size = %u\n", sscreen->b.info.tcc_cache_line_size); + printf("drm = %i.%i.%i\n", sscreen->b.info.drm_major, + sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel); + printf("has_userptr = %i\n", sscreen->b.info.has_userptr); + printf("has_syncobj = %u\n", sscreen->b.info.has_syncobj); + printf("has_sync_file = %u\n", sscreen->b.info.has_sync_file); + + printf("r600_max_quad_pipes = %i\n", sscreen->b.info.r600_max_quad_pipes); + printf("max_shader_clock = %i\n", sscreen->b.info.max_shader_clock); + printf("num_good_compute_units = %i\n", sscreen->b.info.num_good_compute_units); + printf("max_se = %i\n", sscreen->b.info.max_se); + printf("max_sh_per_se = %i\n", sscreen->b.info.max_sh_per_se); + + printf("r600_gb_backend_map = %i\n", sscreen->b.info.r600_gb_backend_map); + printf("r600_gb_backend_map_valid = %i\n", sscreen->b.info.r600_gb_backend_map_valid); + printf("r600_num_banks = %i\n", sscreen->b.info.r600_num_banks); + printf("num_render_backends = %i\n", sscreen->b.info.num_render_backends); + printf("num_tile_pipes = %i\n", sscreen->b.info.num_tile_pipes); + printf("pipe_interleave_bytes = %i\n", sscreen->b.info.pipe_interleave_bytes); + printf("enabled_rb_mask = 0x%x\n", sscreen->b.info.enabled_rb_mask); + printf("max_alignment = %u\n", (unsigned)sscreen->b.info.max_alignment); + } + if (sscreen->b.debug_flags & DBG(TEST_DMA)) si_test_dma(&sscreen->b); if (sscreen->b.debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); return &sscreen->b.b; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 751441df1bc..a66f9da8658 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -99,20 +99,22 @@ struct si_screen { bool has_msaa_sample_loc_bug; bool has_ls_vgpr_init_bug; bool dpbb_allowed; bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; + struct disk_cache *disk_shader_cache; + mtx_t shader_parts_mutex; struct si_shader_part *vs_prologs; struct si_shader_part *tcs_epilogs; struct si_shader_part *gs_prologs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; /* Shader cache in memory. * * Design & limitations: @@ -132,20 +134,22 @@ struct si_screen { struct util_queue shader_compiler_queue; /* Use at most 3 normal compiler threads on quadcore and better. * Hyperthreaded CPUs report the number of threads, but we want * the number of cores. */ LLVMTargetMachineRef tm[3]; /* used by the queue only */ struct util_queue shader_compiler_queue_low_priority; /* Use at most 2 low priority threads on quadcore and better. * We want to minimize the impact on multithreaded Mesa. */ LLVMTargetMachineRef tm_low_priority[2]; /* at most 2 threads */ + + char renderer_string[100]; }; struct si_blend_color { struct r600_atom atom; struct pipe_blend_color state; bool any_nonzeros; }; struct si_sampler_view { struct pipe_sampler_view base; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 3edc340f01f..e1c70aaea26 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -199,61 +199,61 @@ static bool si_shader_cache_insert_shader(struct si_screen *sscreen, hw_binary = si_get_shader_binary(shader); if (!hw_binary) return false; if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary, hw_binary) == NULL) { FREE(hw_binary); return false; } - if (sscreen->b.disk_shader_cache && insert_into_disk_cache) { - disk_cache_compute_key(sscreen->b.disk_shader_cache, tgsi_binary, + if (sscreen->disk_shader_cache && insert_into_disk_cache) { + disk_cache_compute_key(sscreen->disk_shader_cache, tgsi_binary, *((uint32_t *)tgsi_binary), key); - disk_cache_put(sscreen->b.disk_shader_cache, key, hw_binary, + disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, *((uint32_t *) hw_binary), NULL); } return true; } static bool si_shader_cache_load_shader(struct si_screen *sscreen, void *tgsi_binary, struct si_shader *shader) { struct hash_entry *entry = _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary); if (!entry) { - if (sscreen->b.disk_shader_cache) { + if (sscreen->disk_shader_cache) { unsigned char sha1[CACHE_KEY_SIZE]; size_t tg_size = *((uint32_t *) tgsi_binary); - disk_cache_compute_key(sscreen->b.disk_shader_cache, + disk_cache_compute_key(sscreen->disk_shader_cache, tgsi_binary, tg_size, sha1); size_t binary_size; uint8_t *buffer = - disk_cache_get(sscreen->b.disk_shader_cache, + disk_cache_get(sscreen->disk_shader_cache, sha1, &binary_size); if (!buffer) return false; if (binary_size < sizeof(uint32_t) || *((uint32_t*)buffer) != binary_size) { /* Something has gone wrong discard the item * from the cache and rebuild/link from * source. */ assert(!"Invalid radeonsi shader disk cache " "item!"); - disk_cache_remove(sscreen->b.disk_shader_cache, + disk_cache_remove(sscreen->disk_shader_cache, sha1); free(buffer); return false; } if (!si_load_shader_binary(shader, buffer)) { free(buffer); return false; } -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev