On Saturday, September 24, 2016 3:24:53 PM PDT Timothy Arceri wrote: > This uses the recently-added cache.c to write out the final linked > binary for vertex and fragment shader programs. > > This is based off the initial implementation done by Carl. > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 + > src/mesa/drivers/dri/i965/brw_shader_cache.c | 390 > +++++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_state.h | 7 + > 3 files changed, 398 insertions(+) > create mode 100644 src/mesa/drivers/dri/i965/brw_shader_cache.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index df90cb4..bd2bd37 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -147,6 +147,7 @@ i965_FILES = \ > brw_sf_emit.c \ > brw_sf.h \ > brw_sf_state.c \ > + brw_shader_cache.cpp \ > brw_state_batch.c \ > brw_state_cache.c \ > brw_state_dump.c \ > diff --git a/src/mesa/drivers/dri/i965/brw_shader_cache.c > b/src/mesa/drivers/dri/i965/brw_shader_cache.c > new file mode 100644 > index 0000000..aba45b6 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/brw_shader_cache.c > @@ -0,0 +1,390 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +#include <util/macros.h> > +#include <util/mesa-sha1.h> > +#include <main/mtypes.h> > +#include <compiler/glsl/glsl_parser_extras.h> > +#include <compiler/glsl/ir_uniform.h> > +#include <compiler/glsl/cache.h> > +#include <compiler/glsl/blob.h> > + > +#include "brw_state.h" > +#include "brw_wm.h" > +#include "brw_vs.h" > +#include "brw_context.h" > + > +static void > +gen_vs_sha1(struct brw_context *brw, struct gl_shader_program *prog, > + struct brw_vs_prog_key *vs_key, unsigned char *vs_sha1) > +{ > + char sha1_buf[41]; > + unsigned char sha1[20]; > + char manifest[256]; > + int offset = 0; > + > + offset += snprintf(manifest, sizeof(manifest), "program: %s\n", > + _mesa_sha1_format(sha1_buf, prog->sha1)); > + > + _mesa_sha1_compute(vs_key, sizeof *vs_key, sha1); > + offset += snprintf(manifest + offset, sizeof(manifest) - offset, > + "vs_key: %s\n", _mesa_sha1_format(sha1_buf, sha1)); > + > + _mesa_sha1_compute(manifest, strlen(manifest), vs_sha1); > +}
The VS/TCS/TES/GS code is basically identical...you could avoid a lot of duplication by doing... static void gen_shader_sha1(struct brw_context *brw, struct gl_shader_program *prog, unsigned stage, void *key, unsigned char *out_sha1) { char sha1_buf[41]; unsigned char sha1[20]; char manifest[256]; int offset = 0; format_program_sha1(prog, manifest, sizeof(manifest), &offset); _mesa_sha1_compute(key, key_size(stage), sha1); offset += snprintf(manifest + offset, sizeof(manifest) - offset, "%s_key: %s\n", _mesa_shader_stage_to_abbrev(stage), _mesa_sha1_format(sha1_buf, sha1)); _mesa_sha1_compute(manifest, strlen(manifest), tcs_sha1) } assuming you move the key initialization for TCS/TES/GS to the caller, which would make it more consistent with the VS anyway. (Here, key_size is a helper function that returns sizeof(brw_vs_prog) etc.) (Also assuming you're OK with using "VS_key" rather than "vs_key"...) > + > +static void > +gen_wm_sha1(struct brw_context *brw, struct gl_shader_program *prog, > + struct brw_vs_prog_key *vs_key, struct brw_wm_prog_key *wm_key, > + unsigned char *wm_sha1) > +{ > + char sha1_buf[41]; > + unsigned char sha1[20]; > + char manifest[256]; > + int offset = 0; > + > + offset += snprintf(manifest, sizeof(manifest), "program: %s\n", > + _mesa_sha1_format(sha1_buf, prog->sha1)); > + > + brw_wm_populate_key(brw, wm_key); > + _mesa_sha1_compute(wm_key, sizeof *wm_key, sha1); > + offset += snprintf(manifest + offset, sizeof(manifest) - offset, > + "wm_key: %s\n", _mesa_sha1_format(sha1_buf, sha1)); > + > + _mesa_sha1_compute(manifest, strlen(manifest), wm_sha1); > + > +} I don't know why this function is (eventually) monkeying around with the vue map coming out of the GS stage based on VS outputs written. I can't imagine it works once you're caching TES/GS... > + > +static void > +load_program_data(struct gl_shader_program *prog, struct blob_reader *binary, > + struct brw_stage_prog_data *prog_data, > + gl_shader_stage stage, struct gl_context *ctx) > +{ > + static const gl_constant_value zero = { 0 }; > + > + intptr_t parameter_values_base = blob_read_intptr(binary); > + intptr_t uniform_data_slots_base = blob_read_intptr(binary); > + > + uint32_t nr_params = blob_read_uint32(binary); > + assert(nr_params == prog_data->nr_params); > + > + prog_data->param = rzalloc_array(NULL, const gl_constant_value *, > + nr_params); > + if (ctx->_Shader->Flags & GLSL_CACHE_INFO) { > + fprintf(stderr, "Allocating %d prog_data->params (%p)\n", > + prog_data->nr_params, prog_data->param); > + } > + > + for (unsigned i = 0; i < nr_params; i++) { > + intptr_t param = blob_read_intptr(binary); > + ptrdiff_t p_offset, u_offset; > + struct gl_program_parameter_list *param_list = > + prog->_LinkedShaders[stage]->Program->Parameters; > + > + p_offset = (param - parameter_values_base) / sizeof(gl_constant_value); > + u_offset = (param - uniform_data_slots_base) / > sizeof(gl_constant_value); > + > + if (p_offset >= 0 && p_offset < 4 * param_list->NumParameters) { > + prog_data->param[i] = > + ((gl_constant_value *) param_list->ParameterValues) + p_offset; > + } else if (u_offset >= 0 && u_offset < prog->NumUniformDataSlots) { > + prog_data->param[i] = prog->UniformDataSlots + u_offset; > + } else { > + prog_data->param[i] = &zero; > + } > + } > + > + uint32_t nr_pull_params = blob_read_uint32(binary); > + assert(nr_pull_params == prog_data->nr_pull_params); > + > + prog_data->pull_param = rzalloc_array(NULL, const gl_constant_value *, > + nr_pull_params); > + > + for (unsigned i = 0; i < nr_pull_params; i++) { > + intptr_t pull_param = blob_read_intptr(binary); > + /* FIXME: We need to fixup pull_params pointers here. */ > + } > + > +} > + > +static void > +upload_cached_vs(struct brw_context *brw, struct blob_reader *binary, > + struct gl_shader_program *prog, > + struct brw_vs_prog_key *vs_key) > +{ > + struct brw_vs_prog_data *vs_prog_data; > + struct brw_stage_prog_data *prog_data; > + > + /* Read VS program from blob. */ > + size_t vs_program_size = blob_read_uint32(binary); > + uint8_t *vs_program = blob_read_bytes(binary, vs_program_size); > + > + /* Read VS program_data from blob and fixup params pointers. */ > + size_t vs_prog_data_size = blob_read_uint32(binary); > + assert(vs_prog_data_size == sizeof *vs_prog_data); > + > + vs_prog_data = blob_read_bytes(binary, vs_prog_data_size); > + prog_data = &vs_prog_data->base.base; > + > + load_program_data(prog, binary, prog_data, MESA_SHADER_VERTEX, &brw->ctx); > + > + struct brw_vertex_program *vp = > + (struct brw_vertex_program *)brw->vertex_program; > + brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG, > + vs_key, sizeof(struct brw_vs_prog_key), > + vs_program, vs_program_size, > + vs_prog_data, vs_prog_data_size, > + &brw->vs.base.prog_offset, &brw->vs.prog_data, vp); > +} > + > +static void > +upload_cached_wm(struct brw_context *brw, struct blob_reader *binary, > + struct gl_shader_program *prog, > + struct brw_wm_prog_key *wm_key) > +{ > + struct brw_wm_prog_data *wm_prog_data; > + struct brw_stage_prog_data *prog_data; > + > + /* Read WM program from blob. */ > + size_t wm_program_size = blob_read_uint32(binary); > + uint8_t *wm_program = blob_read_bytes(binary, wm_program_size); > + > + /* Read WM program_data from blob and fixup params pointers. */ > + size_t wm_prog_data_size = blob_read_uint32(binary); > + assert(wm_prog_data_size == sizeof *wm_prog_data); > + > + wm_prog_data = blob_read_bytes(binary, wm_prog_data_size); > + prog_data = &wm_prog_data->base; > + > + load_program_data(prog, binary, prog_data, MESA_SHADER_FRAGMENT, > + &brw->ctx); > + > + struct brw_fragment_program *wp = > + (struct brw_fragment_program *)brw->fragment_program; > + brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, > + wm_key, sizeof(struct brw_wm_prog_key), > + wm_program, wm_program_size, > + wm_prog_data, wm_prog_data_size, > + &brw->wm.base.prog_offset, &brw->wm.prog_data, wp); > +} It seems like you could do a similar treatment here...again, upload_cached_{vs,tcs,tes,gs,wm} are basically identical...making a more general function might even allow you to drop some switches in the caller... > + > +void > +upload_cached_program(struct brw_context *brw, gl_shader_stage stage) > +{ > + char sha1_buf[41]; > + unsigned char binary_sha1[20]; > + size_t size; > + uint8_t *buffer; > + struct blob_reader binary; > + struct gl_shader_program *prog; > + struct brw_wm_prog_key wm_key; > + struct brw_vs_prog_key vs_key; > + > + struct program_cache *cache = brw->ctx.Cache; > + if (cache == NULL) > + return; > + > + prog = brw->ctx.Shader.ActiveProgram; > + if (prog == NULL) > + return; > + > + brw_vs_populate_key(brw, &vs_key); > + switch (stage) { > + case MESA_SHADER_VERTEX: > + gen_vs_sha1(brw, prog, &vs_key, binary_sha1); > + break; > + case MESA_SHADER_FRAGMENT: > + gen_wm_sha1(brw, prog, &vs_key, &wm_key, binary_sha1); > + break; > + } > + > + buffer = cache_get(cache, binary_sha1, &size); > + if (buffer == NULL) > + goto FAIL; > + > + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { > + fprintf(stderr, "attempting to populate bo cache with binary: %s\n", > + _mesa_sha1_format(sha1_buf, binary_sha1)); > + } > + > + blob_reader_init(&binary, buffer, size); > + > + switch (stage) { > + case MESA_SHADER_VERTEX: > + upload_cached_vs(brw, &binary, prog, &vs_key); > + break; > + case MESA_SHADER_FRAGMENT: > + upload_cached_wm(brw, &binary, prog, &wm_key); > + break; > + } > + > + if (binary.current != binary.end || binary.overrun) { > + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { > + fprintf(stderr, "Error reading program from cache (did not read " > + "every byte written)\n"); > + } > + goto FAIL; > + } > + > + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { > + fprintf(stderr, "%s: Successfully read every byte written!\n", > + __FUNCTION__); > + } > + prog->program_written_to_cache = true; > + > + free(buffer); > + return; > + > +FAIL: > + /*FIXME: Fall back and compile from source here. */ > + prog->program_written_to_cache = false; > + free(buffer); > +} > + > +static void > +write_program_data(struct gl_shader_program *prog, struct blob *binary, > + struct brw_stage_prog_data *prog_data, > + gl_shader_stage stage) > +{ > + /* Include variable-length params from end of brw_stage_prog_data as well. > + * > + * Before writing either of the params or pull_params arrays, we first > + * write out the addresses of the ParameterValues and UniformDataSlots > + * storage. The pointers within params will be pointers to within one of > + * these blocks of storage. So we can use the addresses of this storage > + * together with the pointer values to correctly construct pointers to the > + * actual storage when the program data is loaded from the cache. > + */ > + blob_write_intptr(binary, > + (intptr_t) prog->_LinkedShaders[stage]-> > + Program->Parameters->ParameterValues); > + > + blob_write_intptr(binary, (intptr_t) prog->UniformDataSlots); > + > + blob_write_uint32(binary, prog_data->nr_params); > + > + for (unsigned i = 0; i < prog_data->nr_params; i++) { > + blob_write_intptr(binary, (intptr_t) prog_data->param[i]); > + } > + > + blob_write_uint32(binary, prog_data->nr_pull_params); > + for (unsigned i = 0; i < prog_data->nr_pull_params; i++) { > + blob_write_intptr(binary, (intptr_t) prog_data->pull_param[i]); > + } > +} > + > +void > +write_cached_program(struct brw_context *brw) > +{ > + struct blob *binary; > + uint8_t *blob_cursor; > + size_t program_size; > + struct gl_shader_program *prog; > + struct program_cache *cache; > + char buf[41]; > + > + cache = brw->ctx.Cache; > + if (cache == NULL) > + return; > + > + prog = brw->ctx.Shader.ActiveProgram; > + if (prog == NULL) > + return; > + > + if (prog->program_written_to_cache) > + return; > + > + struct brw_vs_prog_key vs_key; > + brw_vs_populate_key(brw, &vs_key); > + > + if (prog->_LinkedShaders[MESA_SHADER_VERTEX]) { > + unsigned char vs_sha1[20]; > + > + binary = blob_create (NULL); > + if (binary == NULL) > + return; > + > + gen_vs_sha1(brw, prog, &vs_key, vs_sha1); > + > + /* Write VS program to blob. */ > + program_size = brw->vs.prog_data->program_size; > + > + blob_write_uint32(binary, program_size); > + > + blob_cursor = blob_reserve_bytes(binary, program_size); > + drm_intel_bo_get_subdata(brw->cache.bo, brw->vs.base.prog_offset, > + program_size, blob_cursor); > + > + /* Write VS program_data to blob. */ > + blob_write_uint32(binary, sizeof *brw->vs.prog_data); > + blob_write_bytes(binary, brw->vs.prog_data, sizeof *brw->vs.prog_data); > + > + write_program_data(prog, binary, &brw->vs.prog_data->base.base, > + MESA_SHADER_VERTEX); > + > + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { > + fprintf(stderr, "putting binary in cache: %s\n", > + _mesa_sha1_format(buf, vs_sha1)); > + } > + > + cache_put(cache, vs_sha1, binary->data, binary->size); > + ralloc_free (binary); > + } > + > + if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { > + struct brw_wm_prog_key wm_key; > + unsigned char wm_sha1[20]; > + > + binary = blob_create (NULL); > + if (binary == NULL) > + return; > + > + gen_wm_sha1(brw, prog, &vs_key, &wm_key, wm_sha1); > + > + /* Write WM program to blob. */ > + program_size = brw->wm.prog_data->program_size; > + > + blob_write_uint32(binary, program_size); > + > + blob_cursor = blob_reserve_bytes(binary, program_size); > + drm_intel_bo_get_subdata(brw->cache.bo, brw->wm.base.prog_offset, > + program_size, blob_cursor); > + > + /* Write WM program_data to blob. */ > + blob_write_uint32(binary, sizeof *brw->wm.prog_data); > + blob_write_bytes(binary, brw->wm.prog_data, sizeof *brw->wm.prog_data); > + > + write_program_data(prog, binary, &brw->wm.prog_data->base, > + MESA_SHADER_FRAGMENT); > + > + if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) { > + fprintf(stderr, "putting binary in cache: %s\n", > + _mesa_sha1_format(buf, wm_sha1)); > + } > + > + cache_put(cache, wm_sha1, binary->data, binary->size); > + ralloc_free (binary); > + } Likewise...this code is screaming for a helper function. It's basically five copies of the same thing... > + > + prog->program_written_to_cache = true; > +} > diff --git a/src/mesa/drivers/dri/i965/brw_state.h > b/src/mesa/drivers/dri/i965/brw_state.h > index aba9508..2a11a55 100644 > --- a/src/mesa/drivers/dri/i965/brw_state.h > +++ b/src/mesa/drivers/dri/i965/brw_state.h > @@ -196,6 +196,13 @@ void brw_upload_state_base_address(struct brw_context > *brw); > void gen8_write_pma_stall_bits(struct brw_context *brw, > uint32_t pma_stall_bits); > > +/* brw_shader_cache.h */ > +void > +upload_cached_program(struct brw_context *brw, gl_shader_stage stage); > + > +void > +write_cached_program(struct brw_context *brw); > + > /*********************************************************************** > * brw_state.c > */ >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev