[Mesa-dev] [PATCH 05/12] nir: Remove linker_error calls from nir_lower_samplers().
These should never happen. Plus, NIR passes really shouldn't be reporting linker errors - this is past link time. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir_lower_samplers.cpp | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 7a7cf85..90e023a 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -41,17 +41,12 @@ get_sampler_index(struct gl_shader_program *shader_program, { unsigned location; if (!shader_program->UniformHash->get(location, name)) { - linker_error(shader_program, - "failed to find sampler named %s.\n", name); + assert(!"failed to find sampler"); return 0; } if (!shader_program->UniformStorage[location].sampler[stage].active) { - assert(0 && "cannot return a sampler"); - linker_error(shader_program, - "cannot return a sampler named %s, because it is not " - "used in this shader stage. This is a driver bug.\n", - name); + assert(!"cannot return a sampler"); return 0; } -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/12] nir: Constify prog_to_nir's gl_program pointer.
prog_to_nir should not modify the incoming Mesa IR program - just translate it. Signed-off-by: Kenneth Graunke --- src/mesa/program/prog_to_nir.c | 4 ++-- src/mesa/program/prog_to_nir.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index b298d07..c738f50 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -43,7 +43,7 @@ */ struct ptn_compile { - struct gl_program *prog; + const struct gl_program *prog; nir_builder build; bool error; @@ -1052,7 +1052,7 @@ setup_registers_and_variables(struct ptn_compile *c) } struct nir_shader * -prog_to_nir(struct gl_program *prog, const nir_shader_compiler_options *options) +prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options) { struct ptn_compile *c; struct nir_shader *s; diff --git a/src/mesa/program/prog_to_nir.h b/src/mesa/program/prog_to_nir.h index 3c9b664..34e4cd1 100644 --- a/src/mesa/program/prog_to_nir.h +++ b/src/mesa/program/prog_to_nir.h @@ -28,7 +28,7 @@ extern "C" { #endif -struct nir_shader *prog_to_nir(struct gl_program *prog, +struct nir_shader *prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options); #ifdef __cplusplus -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 12/12] i965/nir: Make INTEL_DEBUG=ann work with NIR.
Now that we store a copy of the NIR shader, and don't immediately free it, we can use it in annotations as well. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 src/mesa/drivers/dri/i965/intel_asm_annotation.c | 5 - 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index ccffd5d..b067735 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -438,6 +438,8 @@ fs_visitor::nir_emit_block(nir_block *block) void fs_visitor::nir_emit_instr(nir_instr *instr) { + this->base_ir = instr; + switch (instr->type) { case nir_instr_type_alu: nir_emit_alu(nir_instr_as_alu(instr)); @@ -464,6 +466,8 @@ fs_visitor::nir_emit_instr(nir_instr *instr) default: unreachable("unknown instruction type"); } + + this->base_ir = NULL; } static brw_reg_type diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c index ac12655..eed5756 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c @@ -29,6 +29,7 @@ #include "program/prog_print.h" #include "program/prog_instruction.h" #include "main/macros.h" +#include "glsl/nir/nir.h" void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, @@ -55,7 +56,9 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation last_annotation_ir = annotation[i].ir; if (last_annotation_ir) { fprintf(stderr, " "); -if (!prog->Instructions) +if (prog->nir) + nir_print_instr(annotation[i].ir, stderr); +else if (!prog->Instructions) fprint_ir(stderr, annotation[i].ir); else { const struct prog_instruction *pi = -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/12] i965: Change brw_shader to gl_shader in brw_link_shader().
Nothing actually wanted brw_shader fields - we just had to type shader->base all over the place for no reason. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_shader.cpp | 63 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 54d6d71..9fad02c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -129,15 +129,14 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; - struct brw_shader *shader = -(struct brw_shader *)shProg->_LinkedShaders[stage]; + struct gl_shader *shader = shProg->_LinkedShaders[stage]; if (!shader) continue; struct gl_program *prog = ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage), -shader->base.Name); +shader->Name); if (!prog) return false; prog->Parameters = _mesa_new_parameter_list(); @@ -147,19 +146,19 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) /* Temporary memory context for any new IR. */ void *mem_ctx = ralloc_context(NULL); - ralloc_adopt(mem_ctx, shader->base.ir); + ralloc_adopt(mem_ctx, shader->ir); bool progress; /* lower_packing_builtins() inserts arithmetic instructions, so it * must precede lower_instructions(). */ - brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->base.ir); - do_mat_op_to_vec(shader->base.ir); + brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->ir); + do_mat_op_to_vec(shader->ir); const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; - lower_instructions(shader->base.ir, + lower_instructions(shader->ir, MOD_TO_FLOOR | DIV_TO_MUL_RCP | SUB_TO_ADD_NEG | @@ -172,21 +171,21 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) * if-statements need to be flattened. */ if (brw->gen < 6) -lower_if_to_cond_assign(shader->base.ir, 16); +lower_if_to_cond_assign(shader->ir, 16); - do_lower_texture_projection(shader->base.ir); - brw_lower_texture_gradients(brw, shader->base.ir); - do_vec_index_to_cond_assign(shader->base.ir); - lower_vector_insert(shader->base.ir, true); + do_lower_texture_projection(shader->ir); + brw_lower_texture_gradients(brw, shader->ir); + do_vec_index_to_cond_assign(shader->ir); + lower_vector_insert(shader->ir, true); if (options->NirOptions == NULL) - brw_do_cubemap_normalize(shader->base.ir); - lower_offset_arrays(shader->base.ir); - brw_do_lower_unnormalized_offset(shader->base.ir); - lower_noise(shader->base.ir); - lower_quadop_vector(shader->base.ir, false); + brw_do_cubemap_normalize(shader->ir); + lower_offset_arrays(shader->ir); + brw_do_lower_unnormalized_offset(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); bool lowered_variable_indexing = - lower_variable_index_to_cond_assign(shader->base.ir, + lower_variable_index_to_cond_assign(shader->ir, options->EmitNoIndirectInput, options->EmitNoIndirectOutput, options->EmitNoIndirectTemp, @@ -197,23 +196,23 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) "back to very inefficient code generation\n"); } - lower_ubo_reference(&shader->base, shader->base.ir); + lower_ubo_reference(shader, shader->ir); do { progress = false; if (is_scalar_shader_stage(brw, stage)) { - brw_do_channel_expressions(shader->base.ir); - brw_do_vector_splitting(shader->base.ir); + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); } -progress = do_lower_jumps(shader->base.ir, true, true, +progress = do_lower_jumps(shader->ir, true, true, true, /* main return */ false, /* continue */ false /* loops */ ) || progress; -progress = do_common_optimization(shader->base.ir, true, true, +progress = do_common_optimization(shader->ir, tr
[Mesa-dev] [PATCH 06/12] nir: Constify nir_lower_sampler's gl_shader_program pointer.
Now that we're not generating linker errors, we don't actually modify this. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_lower_samplers.cpp | 10 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 679911c..e844e4d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1611,7 +1611,7 @@ void nir_lower_alu_to_scalar(nir_shader *shader); void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, -struct gl_shader_program *shader_program, +const struct gl_shader_program *shader_program, gl_shader_stage stage); void nir_lower_system_values(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 90e023a..cf8ab83 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -36,7 +36,7 @@ extern "C" { } static unsigned -get_sampler_index(struct gl_shader_program *shader_program, +get_sampler_index(const struct gl_shader_program *shader_program, gl_shader_stage stage, const char *name) { unsigned location; @@ -54,7 +54,7 @@ get_sampler_index(struct gl_shader_program *shader_program, } static void -lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, +lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, gl_shader_stage stage, void *mem_ctx) { if (instr->sampler == NULL) @@ -133,7 +133,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, typedef struct { void *mem_ctx; - struct gl_shader_program *shader_program; + const struct gl_shader_program *shader_program; gl_shader_stage stage; } lower_state; @@ -154,7 +154,7 @@ lower_block_cb(nir_block *block, void *_state) } static void -lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, +lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, gl_shader_stage stage) { lower_state state; @@ -167,7 +167,7 @@ lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, } extern "C" void -nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, +nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program, gl_shader_stage stage) { nir_foreach_overload(shader, overload) { -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/12] nir: Store num_direct_uniforms in the nir_shader.
Storing this here is pretty sketchy - I don't know if any driver other than i965 will want to use it. But this will make it a lot easier to generate NIR code at link time. We'll probably rework it anyway. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir.h | 3 +++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index e844e4d..7d11996 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1429,6 +1429,9 @@ typedef struct nir_shader { * access plus one */ unsigned num_inputs, num_uniforms, num_outputs; + + /** the number of uniforms that are only accessed directly */ + unsigned num_direct_uniforms; } nir_shader; #define nir_foreach_overload(shader, overload)\ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 145a447..034b79a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -120,7 +120,7 @@ fs_visitor::emit_nir_code() if (shader_prog) { nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &num_direct_uniforms, + &nir->num_direct_uniforms, &nir->num_uniforms); } else { /* ARB programs generally create a giant array of "uniform" data, and allow @@ -128,7 +128,7 @@ fs_visitor::emit_nir_code() * analysis, it's all or nothing. num_direct_uniforms is only useful when * we have some direct and some indirect access; it doesn't matter here. */ - num_direct_uniforms = 0; + nir->num_direct_uniforms = 0; } nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); @@ -343,6 +343,7 @@ void fs_visitor::nir_setup_uniforms(nir_shader *shader) { uniforms = shader->num_uniforms; + num_direct_uniforms = shader->num_direct_uniforms; /* We split the uniform register file in half. The first half is * entirely direct uniforms. The second half is indirect. -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/12] i965: Move brw_link_shader's GLSL IR transformations into a helper.
This function was getting a bit large and unwieldy. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_shader.cpp | 192 --- 1 file changed, 99 insertions(+), 93 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 9fad02c..bf9aceb 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -120,6 +120,104 @@ brw_lower_packing_builtins(struct brw_context *brw, lower_packing_builtins(ir, ops); } +static void +process_glsl_ir(struct brw_context *brw, +struct gl_shader_program *shader_prog, +struct gl_shader *shader) +{ + struct gl_context *ctx = &brw->ctx; + const struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + + /* Temporary memory context for any new IR. */ + void *mem_ctx = ralloc_context(NULL); + + ralloc_adopt(mem_ctx, shader->ir); + + /* lower_packing_builtins() inserts arithmetic instructions, so it +* must precede lower_instructions(). +*/ + brw_lower_packing_builtins(brw, shader->Stage, shader->ir); + do_mat_op_to_vec(shader->ir); + const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0; + lower_instructions(shader->ir, + MOD_TO_FLOOR | + DIV_TO_MUL_RCP | + SUB_TO_ADD_NEG | + EXP_TO_EXP2 | + LOG_TO_LOG2 | + bitfield_insert | + LDEXP_TO_ARITH); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, +* if-statements need to be flattened. +*/ + if (brw->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + + do_lower_texture_projection(shader->ir); + brw_lower_texture_gradients(brw, shader->ir); + do_vec_index_to_cond_assign(shader->ir); + lower_vector_insert(shader->ir, true); + if (options->NirOptions == NULL) + brw_do_cubemap_normalize(shader->ir); + lower_offset_arrays(shader->ir); + brw_do_lower_unnormalized_offset(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); + + bool lowered_variable_indexing = + lower_variable_index_to_cond_assign(shader->ir, + options->EmitNoIndirectInput, + options->EmitNoIndirectOutput, + options->EmitNoIndirectTemp, + options->EmitNoIndirectUniform); + + if (unlikely(brw->perf_debug && lowered_variable_indexing)) { + perf_debug("Unsupported form of variable indexing in FS; falling " + "back to very inefficient code generation\n"); + } + + lower_ubo_reference(shader, shader->ir); + + bool progress; + do { + progress = false; + + if (is_scalar_shader_stage(brw, shader->Stage)) { + brw_do_channel_expressions(shader->ir); + brw_do_vector_splitting(shader->ir); + } + + progress = do_lower_jumps(shader->ir, true, true, +true, /* main return */ +false, /* continue */ +false /* loops */ +) || progress; + + progress = do_common_optimization(shader->ir, true, true, +options, ctx->Const.NativeIntegers) || progress; + } while (progress); + + validate_ir_tree(shader->ir); + + /* Now that we've finished altering the linked IR, reparent any live IR back +* to the permanent memory context, and free the temporary one (discarding any +* junk we optimized away). +*/ + reparent_ir(shader->ir, shader->ir); + ralloc_free(mem_ctx); + + if (ctx->_Shader->Flags & GLSL_DUMP) { + fprintf(stderr, "\n"); + fprintf(stderr, "GLSL IR for linked %s program %d:\n", + _mesa_shader_stage_to_string(shader->Stage), + shader_prog->Name); + _mesa_print_ir(stderr, shader->ir, NULL); + fprintf(stderr, "\n"); + } +} + GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { @@ -127,8 +225,6 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) unsigned int stage; for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[stage]; struct gl_shader *shader = shProg->_LinkedShaders[stage]; if (!shader) @@ -143,79 +239,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog); - /* Temporary memory context for any new IR. */ - void *mem_ctx = ralloc_context(NULL); - - ralloc_adopt(mem_ctx, shader->ir); - - b
[Mesa-dev] [PATCH 02/12] nir: Fix #include guards in shader_enums.h.
This header was originally going to be called pipeline.h, but it got renamed at the last minute. Make the include guards match. Signed-off-by: Kenneth Graunke --- src/glsl/shader_enums.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 0e08bd3..7f59fdc 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -23,8 +23,8 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef PIPELINE_H -#define PIPELINE_H +#ifndef SHADER_ENUMS_H +#define SHADER_ENUMS_H /** * Bitflags for system values. @@ -167,4 +167,4 @@ enum glsl_interp_qualifier }; -#endif /* PIPELINE_H */ +#endif /* SHADER_ENUMS_H */ -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/12] nir: Move gl_shader_stage enum from mtypes.h to shader_enums.h.
I want to use this in some code that doesn't currently include mtypes.h. It seems like a better place for it anyway. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir.h | 1 + src/glsl/shader_enums.h | 17 + src/mesa/main/mtypes.h | 19 --- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index f9ca0f7..17a9354 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -34,6 +34,7 @@ #include "util/set.h" #include "util/bitset.h" #include "nir_types.h" +#include "glsl/shader_enums.h" #include #include "nir_opcodes.h" diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 7f59fdc..79e0f6b 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -27,6 +27,23 @@ #define SHADER_ENUMS_H /** + * Shader stages. Note that these will become 5 with tessellation. + * + * The order must match how shaders are ordered in the pipeline. + * The GLSL linker assumes that if ihttp://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().
Previously, we translated into NIR and did all the optimizations and lowering as part of running fs_visitor. This meant that we did all of that work twice for fragment shaders - once for SIMD8, and again for SIMD16. We also had to redo it every time we hit a state based recompile. We now generate NIR once at link time. ARB programs don't have linking, so we instead generate it at ProgramStringNotify time. Mesa's fixed function vertex program handling doesn't bother to inform the driver about new programs at all (which is rather mean), so we generate NIR at the last minute, if it hasn't happened already. shader-db runs ~9.4% faster on my i7-5600U, with a release build. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 174 +-- src/mesa/drivers/dri/i965/brw_nir.c| 213 + src/mesa/drivers/dri/i965/brw_nir.h| 6 + src/mesa/drivers/dri/i965/brw_program.c| 7 + src/mesa/drivers/dri/i965/brw_shader.cpp | 6 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 ++- src/mesa/main/mtypes.h | 2 + src/mesa/program/program.c | 5 + 9 files changed, 255 insertions(+), 176 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 498d5a7..6d4659f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -77,6 +77,7 @@ i965_FILES = \ brw_misc_state.c \ brw_multisample_state.h \ brw_nir.h \ + brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ brw_object_purgeable.c \ brw_packed_float.c \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 034b79a..ccffd5d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -28,175 +28,10 @@ #include "brw_fs.h" #include "brw_nir.h" -static void -nir_optimize(nir_shader *nir) -{ - bool progress; - do { - progress = false; - nir_lower_vars_to_ssa(nir); - nir_validate_shader(nir); - nir_lower_alu_to_scalar(nir); - nir_validate_shader(nir); - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - nir_lower_phis_to_scalar(nir); - nir_validate_shader(nir); - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - progress |= nir_opt_dce(nir); - nir_validate_shader(nir); - progress |= nir_opt_cse(nir); - nir_validate_shader(nir); - progress |= nir_opt_peephole_select(nir); - nir_validate_shader(nir); - progress |= nir_opt_algebraic(nir); - nir_validate_shader(nir); - progress |= nir_opt_constant_folding(nir); - nir_validate_shader(nir); - progress |= nir_opt_remove_phis(nir); - nir_validate_shader(nir); - } while (progress); -} - -static bool -count_nir_instrs_in_block(nir_block *block, void *state) -{ - int *count = (int *) state; - nir_foreach_instr(block, instr) { - *count = *count + 1; - } - return true; -} - -static int -count_nir_instrs(nir_shader *nir) -{ - int count = 0; - nir_foreach_overload(nir, overload) { - if (!overload->impl) - continue; - nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); - } - return count; -} - void fs_visitor::emit_nir_code() { - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - - nir_shader *nir; - /* First, lower the GLSL IR or Mesa IR to NIR */ - if (shader_prog) { - nir = glsl_to_nir(&shader->base, options); - } else { - nir = prog_to_nir(prog, options); - nir_convert_to_ssa(nir); /* turn registers into SSA */ - } - nir_validate_shader(nir); - - nir_lower_global_vars_to_local(nir); - nir_validate_shader(nir); - - nir_lower_tex_projector(nir); - nir_validate_shader(nir); - - nir_normalize_cubemap_coords(nir); - nir_validate_shader(nir); - - nir_split_var_copies(nir); - nir_validate_shader(nir); - - nir_optimize(nir); - - /* Lower a bunch of stuff */ - nir_lower_var_copies(nir); - nir_validate_shader(nir); - - /* Get rid of split copies */ - nir_optimize(nir); - - if (shader_prog) { - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms); - } else { - /* ARB programs generally create a giant array of "uniform" data, and allow - * indirect addressing without any boundaries. In the absence of bounds - * analysis, it's all or nothing. num_direct_uniforms is only useful when - * we have some direct and some indirect access; it doesn't matter
[Mesa-dev] [PATCH 09/12] i965: Move lower_output_reads to brw_link_shader().
This makes it so emit_nir_code() doesn't modify the GLSL IR. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 1 - src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7c56290..145a447 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -90,7 +90,6 @@ fs_visitor::emit_nir_code() nir_shader *nir; /* First, lower the GLSL IR or Mesa IR to NIR */ if (shader_prog) { - lower_output_reads(shader->base.ir); nir = glsl_to_nir(&shader->base, options); } else { nir = prog_to_nir(prog, options); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index bf9aceb..8700077 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -199,6 +199,9 @@ process_glsl_ir(struct brw_context *brw, options, ctx->Const.NativeIntegers) || progress; } while (progress); + if (options->NirOptions != NULL) + lower_output_reads(shader->ir); + validate_ir_tree(shader->ir); /* Now that we've finished altering the linked IR, reparent any live IR back -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 00/12] i965: Generate NIR at link time
Hello, This series makes i965 generate NIR at link time (or ProgramStringNotify time for ARB programs), rather than on each FS/VS compile. This means we only do it once, rather than for SIMD8 and again for SIMD16 programs. It also means we can avoid it when doing state based recompiles. It speeds up shader-db on my Broadwell by about 9.4%. It also adds INTEL_DEBUG=ann support, now that we keep a persistent copy of the NIR program around for the annotations to refer to. Available in the 'nir-link' branch of ~kwg/mesa. --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 04/12] nir: Make nir_lower_samplers take a gl_shader_stage, not a gl_program *.
We don't actually need a gl_program struct. We only used it to translate prog->Target (i.e. GL_VERTEX_PROGRAM) to the gl_shader_stage (i.e. MESA_SHADER_VERTEX). We may as well just pass that. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_lower_samplers.cpp | 26 -- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 17a9354..679911c 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1612,7 +1612,7 @@ void nir_lower_phis_to_scalar(nir_shader *shader); void nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, -struct gl_program *prog); +gl_shader_stage stage); void nir_lower_system_values(nir_shader *shader); void nir_lower_tex_projector(nir_shader *shader); diff --git a/src/glsl/nir/nir_lower_samplers.cpp b/src/glsl/nir/nir_lower_samplers.cpp index 1e509a9..7a7cf85 100644 --- a/src/glsl/nir/nir_lower_samplers.cpp +++ b/src/glsl/nir/nir_lower_samplers.cpp @@ -36,11 +36,9 @@ extern "C" { } static unsigned -get_sampler_index(struct gl_shader_program *shader_program, const char *name, - const struct gl_program *prog) +get_sampler_index(struct gl_shader_program *shader_program, + gl_shader_stage stage, const char *name) { - GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); - unsigned location; if (!shader_program->UniformHash->get(location, name)) { linker_error(shader_program, @@ -48,7 +46,7 @@ get_sampler_index(struct gl_shader_program *shader_program, const char *name, return 0; } - if (!shader_program->UniformStorage[location].sampler[shader].active) { + if (!shader_program->UniformStorage[location].sampler[stage].active) { assert(0 && "cannot return a sampler"); linker_error(shader_program, "cannot return a sampler named %s, because it is not " @@ -57,12 +55,12 @@ get_sampler_index(struct gl_shader_program *shader_program, const char *name, return 0; } - return shader_program->UniformStorage[location].sampler[shader].index; + return shader_program->UniformStorage[location].sampler[stage].index; } static void lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, - const struct gl_program *prog, void *mem_ctx) + gl_shader_stage stage, void *mem_ctx) { if (instr->sampler == NULL) return; @@ -133,7 +131,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, } } - instr->sampler_index += get_sampler_index(shader_program, name, prog); + instr->sampler_index += get_sampler_index(shader_program, stage, name); instr->sampler = NULL; } @@ -141,7 +139,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program, typedef struct { void *mem_ctx; struct gl_shader_program *shader_program; - struct gl_program *prog; + gl_shader_stage stage; } lower_state; static bool @@ -152,7 +150,7 @@ lower_block_cb(nir_block *block, void *_state) nir_foreach_instr(block, instr) { if (instr->type == nir_instr_type_tex) { nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - lower_sampler(tex_instr, state->shader_program, state->prog, + lower_sampler(tex_instr, state->shader_program, state->stage, state->mem_ctx); } } @@ -162,23 +160,23 @@ lower_block_cb(nir_block *block, void *_state) static void lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program, - struct gl_program *prog) + gl_shader_stage stage) { lower_state state; state.mem_ctx = ralloc_parent(impl); state.shader_program = shader_program; - state.prog = prog; + state.stage = stage; nir_foreach_block(impl, lower_block_cb, &state); } extern "C" void nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program, - struct gl_program *prog) + gl_shader_stage stage) { nir_foreach_overload(shader, overload) { if (overload->impl) - lower_impl(overload->impl, shader_program, prog); + lower_impl(overload->impl, shader_program, stage); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index a874337..7c56290 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -141,7 +141,7 @@ fs_visitor::emit_nir_code() nir_validate_shader(nir); if (shader_prog) { - nir_lower_samplers(nir, shader_prog, shader->base.Program); + nir_lower_samplers(nir, shader_prog, stage); nir_validate_shader(nir); } -- 2.3.5 _
Re: [Mesa-dev] DMA_BUF render targets disabled for intel
On 07.04.2015 21:54, Chad Versace wrote: On Thu 02 Apr 2015, Axel Davy wrote: Hi, you may be interesting look at this related bug report: https://bugs.freedesktop.org/show_bug.cgi?id=87452#c5 Yours, Axel Davy On 02/04/2015 11:58, Volker Vogelhuber wrote : We currently want to stream OpenGL output to an FPGA that does not provide a SG controller and should manage the transfers from the CPU memory to it's own hardware. For that reason we want to have the OpenGL driver (intel baytrail) to render at a specific memory area within the CPU system. Render to texture as it is possible e.g. on the PowerVR 530 seems not to be possible, as GL_TEXTURE_EXTERNAL_OES is not valid for glFrameBufferTexture2D and in contrast to the PowerVR OpenGL implementation, Mesa seems to prohibit the use of GL_TEXTURE_2D for textures created by glEGLImageTargetTexture2DOES (there is a check within Mesa where glEGLImageTargetTexture2DOES's target has to be equal to the target of the texture => GL_TEXTURE_EXTERNAL_OES != GL_TEXTURE_2D). So the only possible way to render to an EGLImage with memory allocated by myself seems to be the use of glEGLImageTargetRenderbufferStorageOES and bind this render buffer using glFramebufferRenderbuffer to the FBO. But for some reason, it seems to be forbidden to use an EGLImage imported from a dmabuf as render buffer. At least within src/mesa/drivers/dri/i965/intel_fbo.c there is a check: /* Buffers originating from outside are for read-only. */ if (image->dma_buf_imported) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEGLImageTargetRenderbufferStorage(dma buffers are read-only)"); return; } This prevents me from doing what I wanted to do and I googled a bit. I found someone else that just removed that check: https://github.com/kalyankondapally/Chromium-OzoneGBM/blob/master/0010-i965-remove-read-only-restriction-of-imported-buffer.patch That patch isn't safe for general renderbuffer usage... details below. (As an aside, Chrome OS also has a similar patch in their Mesa tree. But it's safe for Chrome OS, at least for now). Why it's safe only for ChromeOS? Do you mean it's not safe for X11 or is there something else, I should be aware of. Actually we're not using X11 ourselfs, but only raw DRM/KMS infrastructure. and after I did so myself, it just worked as I wanted it to work. I only wonder why this limitation has been added. Is it just for some pedantic reasons or is there any good reason why EGLImages imported from dmabuf descriptors shouldn't be used for render targets? There is a very good reason. It is not pedantic. And me and Tapani (CC'd) are working on enabling this. See [https://bugs.freedesktop.org/show_bug.cgi?id=87452#c7] for my work-in-progress patches. The reason is that, on Intel chipsets Ivybridge and newer, the i965 driver often expects each color buffer to have an auxiliary metadata buffer that holds compresson information. If the aux buffer does not exist, i965 will create it. If the metadata buffer and the real color buffer become unsynchronized (which is *very* likey when using a dma_buf as renderbuffer storage), you will get corrupt rendering. If you haven't got corrupt rendering, it's solely due to luck (and that luck is proportional to the density of cleared pixels exist in the buffer). Based on your patches I had a quick look in the source code for creating MCS buffers, but without knowing details about the intel GPUs I doubt it makes much sense for me to dive too deep into it. Therefore, i965 needs to be taught to disable aux buffers for dma_buf-backed storage. Before that happens, you risk corrupted images if you render to a dma_buf-backed renderbuffer. If you apply Kalyan's patch on top of my (untested) patches, then that should safely enable what you're doing with the FPGA. (There may be still be bugs with EGLImage orphaning semantics, but that likely won't affect you). Thanks again. Are there any forecasts when it will be available upstream? Regards, Volker ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().
On 08/04/15 10:06, Kenneth Graunke wrote: Previously, we translated into NIR and did all the optimizations and lowering as part of running fs_visitor. This meant that we did all of that work twice for fragment shaders - once for SIMD8, and again for SIMD16. We also had to redo it every time we hit a state based recompile. We now generate NIR once at link time. ARB programs don't have linking, so we instead generate it at ProgramStringNotify time. Mesa's fixed function vertex program handling doesn't bother to inform the driver about new programs at all (which is rather mean), so we generate NIR at the last minute, if it hasn't happened already. shader-db runs ~9.4% faster on my i7-5600U, with a release build. Nice speed improvement but wouldn't it affect negatively programs using SSO to recombine shaders at run time? Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 174 +-- src/mesa/drivers/dri/i965/brw_nir.c| 213 + src/mesa/drivers/dri/i965/brw_nir.h| 6 + src/mesa/drivers/dri/i965/brw_program.c| 7 + src/mesa/drivers/dri/i965/brw_shader.cpp | 6 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 ++- src/mesa/main/mtypes.h | 2 + src/mesa/program/program.c | 5 + 9 files changed, 255 insertions(+), 176 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 498d5a7..6d4659f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -77,6 +77,7 @@ i965_FILES = \ brw_misc_state.c \ brw_multisample_state.h \ brw_nir.h \ + brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ brw_object_purgeable.c \ brw_packed_float.c \ diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 034b79a..ccffd5d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -28,175 +28,10 @@ #include "brw_fs.h" #include "brw_nir.h" -static void -nir_optimize(nir_shader *nir) -{ - bool progress; - do { - progress = false; - nir_lower_vars_to_ssa(nir); - nir_validate_shader(nir); - nir_lower_alu_to_scalar(nir); - nir_validate_shader(nir); - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - nir_lower_phis_to_scalar(nir); - nir_validate_shader(nir); - progress |= nir_copy_prop(nir); - nir_validate_shader(nir); - progress |= nir_opt_dce(nir); - nir_validate_shader(nir); - progress |= nir_opt_cse(nir); - nir_validate_shader(nir); - progress |= nir_opt_peephole_select(nir); - nir_validate_shader(nir); - progress |= nir_opt_algebraic(nir); - nir_validate_shader(nir); - progress |= nir_opt_constant_folding(nir); - nir_validate_shader(nir); - progress |= nir_opt_remove_phis(nir); - nir_validate_shader(nir); - } while (progress); -} - -static bool -count_nir_instrs_in_block(nir_block *block, void *state) -{ - int *count = (int *) state; - nir_foreach_instr(block, instr) { - *count = *count + 1; - } - return true; -} - -static int -count_nir_instrs(nir_shader *nir) -{ - int count = 0; - nir_foreach_overload(nir, overload) { - if (!overload->impl) - continue; - nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); - } - return count; -} - void fs_visitor::emit_nir_code() { - const nir_shader_compiler_options *options = - ctx->Const.ShaderCompilerOptions[stage].NirOptions; - - nir_shader *nir; - /* First, lower the GLSL IR or Mesa IR to NIR */ - if (shader_prog) { - nir = glsl_to_nir(&shader->base, options); - } else { - nir = prog_to_nir(prog, options); - nir_convert_to_ssa(nir); /* turn registers into SSA */ - } - nir_validate_shader(nir); - - nir_lower_global_vars_to_local(nir); - nir_validate_shader(nir); - - nir_lower_tex_projector(nir); - nir_validate_shader(nir); - - nir_normalize_cubemap_coords(nir); - nir_validate_shader(nir); - - nir_split_var_copies(nir); - nir_validate_shader(nir); - - nir_optimize(nir); - - /* Lower a bunch of stuff */ - nir_lower_var_copies(nir); - nir_validate_shader(nir); - - /* Get rid of split copies */ - nir_optimize(nir); - - if (shader_prog) { - nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, - &nir->num_direct_uniforms, - &nir->num_uniforms); - } else { - /* ARB programs generally create a giant array of "uniform" data, and allow - * indirect addressing without any boundaries. In th
[Mesa-dev] [PATCH] r600g/sb: Skip empty ALU clause while scheduling
Fixes assert triggered by ext_transform_feedback-intervening-read output use_gs piglit test. Signed-off-by: Glenn Kennard --- src/gallium/drivers/r600/sb/sb_sched.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp b/src/gallium/drivers/r600/sb/sb_sched.cpp index 4248a3f..2e38a62 100644 --- a/src/gallium/drivers/r600/sb/sb_sched.cpp +++ b/src/gallium/drivers/r600/sb/sb_sched.cpp @@ -825,6 +825,9 @@ void post_scheduler::init_regmap() { void post_scheduler::process_alu(container_node *c) { + if (c->empty()) + return; + ucm.clear(); alu.reset(); -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] glsl: Allow any sort of sampler array indexing with GLSL ES < 3.00
On 04/08/2015 01:36 AM, Ian Romanick wrote: On 04/07/2015 03:22 AM, Francisco Jerez wrote: Tapani Pälli writes: From: Kalyan Kondapally Dynamic indexing of sampler arrays is prohibited by GLSL ES 3.00. Earlier versions allow 'constant-index-expression' indexing, where index can contain a loop induction variable. Patch allows dynamic indexing for sampler arrays when GLSL ES < 3.00. This change makes 'sampler-array-index.frag' parser test in Piglit pass + fishgl.com works when running Chrome on OpenGL ES 2.0 backend. v2: small change and some more commit message (Tapani) Signed-off-by: Kalyan Kondapally Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84225 Looks good, but did you check what happens now if the shader uses actual variable indexing (i.e. which lowering cannot turn into a constant) on an implementation that doesn't support it? Hopefully no crashes or hangs? I think we should add a post-link check that no dynamic indexing remains after all the optimizations are complete. The intention if the ES2 language was to allow cases where the dynamic indexing could be optimized away. This was redacted in ES3 because each optimizer was differently capable, so a shader that worked on one driver/GPU might fail on another... even from the same vendor. Adding the post-link check should prevent the problems the Curro (rightly) worried about, and it should still allow the WebGL demo to work. I was not sure if this is worth the effort since this path has been active for desktop GLSL < 1.30 for quite a long time, but I can take a look at adding such check. --- src/glsl/ast_array_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index ecef651..b2609b6 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -226,7 +226,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, * dynamically uniform expression is undefined. */ if (array->type->element_type()->is_sampler()) { -if (!state->is_version(130, 100)) { +if (!state->is_version(130, 300)) { if (state->es_shader) { _mesa_glsl_warning(&loc, state, "sampler arrays indexed with non-constant " It looks like this is what e3ded7f should have made this code. Looking at the rest of the surrounding code, I don't think this is quite right... at the very least, it's not easy to follow. You can blame me and Paul for that. I think this is correct and easier to follow: if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { if (state->is_version(130, 300)) _mesa_glsl_error(&loc, state, "sampler arrays indexed with non-constant " "expressions are forbidden in GLSL %s " "and later" state->es_shader ? "ES 3.00" : "1.30"); else if (state->es_shader) _mesa_glsl_warning(&loc, state, "sampler arrays indexed with non-constant " "expressions are optional in %s and will " "be forbidden in GLSL ES 3.00 and later" state->version_string()); else _mesa_glsl_warning(&loc, state, "sampler arrays indexed with non-constant " "expressions will be forbidden in GLSL " "1.30 and later"); } OK, thanks! -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] st_TexSubImage: unaligned memcpy performance
Hi, (sorry for possible double-posting, i sent this earlier but before subscribing to mesa-dev list) I have an issue where st_TexSubImage causes very high CPU load in __memcpy_sse2_unaligned (Mesa 10.1.3, Xorg 1.15.1, radeon driver, HD 7870). Any obvious causes / tips for this? e.g. align textures or use different format/type? I 've tried using GL_BGRA/GL_UNSIGNED_BYTE and GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV __memcpy_sse2_unaligned () at ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85 85../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: No such file or directory. (gdb) bt #0 __memcpy_sse2_unaligned () at ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85 #1 0x7fffb572f154 in memcpy (__len=7680, __src=, __dest=0x7fff5835f800) at /usr/include/x86_64-linux-gnu/bits/string3.h:51 #2 st_TexSubImage (ctx=0x1b91420, dims=, texImage=0x1f81710, xoffset=0, yoffset=0, zoffset=0, width=1920, height=1080, depth=1, format=32993, type=5121, pixels=0xdacf90, unpack=0x1bad590) at ../../../../src/mesa/state_tracker/st_cb_texture.c:752 #3 0x7fffb56c283d in texsubimage (ctx=0x1b91420, dims=dims@entry=2, target=3553, level=0, xoffset=0, yoffset=0, zoffset=zoffset@entry=0, width=1920, height=1080, depth=depth@entry=1, format=format@entry=32993, type=type@entry=5121, pixels=pixels@entry=0xdacf90) at ../../../../src/mesa/main/teximage.c:3445 #4 0x7fffb56c659c in _mesa_TexSubImage2D (target=, level=, xoffset=, yoffset=, width=, height=, format=32993, type=5121, pixels=0xdacf90) at ../../../../src/mesa/main/teximage.c:3483 #5 0x7346191a in ?? () from /opt/build/Qt/5.4/gcc_64/lib/libQt5Gui.so.5 #6 0x7345e6ab in ?? () from /opt/build/Qt/5.4/gcc_64/lib/libQt5Gui.so.5 #7 0x7345ea32 in QOpenGLTexture::setData(int, QOpenGLTexture::PixelFormat, QOpenGLTexture::PixelType, void*, QOpenGLPixelTransferOptions const*) () from /opt/build/Qt/5.4/gcc_64/lib/libQt5Gui.so.5 thanks for any help, - Vasilis ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] st_TexSubImage: unaligned memcpy performance
Hi, On 8 April 2015 at 10:57, Vasilis Liaskovitis wrote: > I have an issue where st_TexSubImage causes very high CPU load in > __memcpy_sse2_unaligned (Mesa 10.1.3, Xorg 1.15.1, radeon driver, HD 7870). > > Any obvious causes / tips for this? e.g. align textures or use different > format/type? I 've tried using GL_BGRA/GL_UNSIGNED_BYTE and > GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV > > __memcpy_sse2_unaligned () at > ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85 > 85../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: No such file or > directory. > (gdb) bt > #0 __memcpy_sse2_unaligned () at > ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85 > #1 0x7fffb572f154 in memcpy (__len=7680, __src=, > __dest=0x7fff5835f800) at /usr/include/x86_64-linux-gnu/bits/string3.h:51 > #2 st_TexSubImage (ctx=0x1b91420, dims=, texImage=0x1f81710, > xoffset=0, yoffset=0, zoffset=0, width=1920, height=1080, depth=1, > format=32993, type=5121, pixels=0xdacf90, unpack=0x1bad590) > at ../../../../src/mesa/state_tracker/st_cb_texture.c:752 Your source (0xdacf90) is only aligned to a 16-byte boundary, not 32. This will cause issues particularly on ARM, where natural alignment is required (i.e. 32-byte load/stores must be on 32-byte boundaries). By contrast, the destination is already aligned to a 128-byte boundary. So fixing the caller, rather than Mesa, should take care of the problem. Cheers, Daniel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Fix automatic indentation mode for recent emacs, use fewer columns in .git
It seems a bit strange that this has stopped working for you. If you specify a mode in the .dir-locals.el file then it's supposed to set the variable for any files with that mode or any modes inherited from that mode. The C and C++ modes both inherit from prog-mode, as well as a bunch of other ones such as Python and lisp files. If you are using a non-standard mode for C files it would be surprising if it doesn't also inherit from prog-mode. I have just tested this with emacs -q (to prevent it from loading my personal config) on Emacs 24.3.1 and it does work as is. I don't think the patch would break anything for me since you explicitly set the fill-column back to 70 for commit messages so I don't care enough to complain if you want to commit it anyway, but it does seem like something fishy is going on and the reasoning in the commit message doesn't add up. Regards, - Neil Carl Worth writes: > I recently noticed (after upgrading to emacs 24?) that I was no longer > getting automatic C-style settings in emacs like I was accustomed to > getting. That is, I was now getting a default indentation of 8 and > indentation with tabs instead of spaces. > > It appears that the .dir-locals.el file is no longer taking > effect. Presumably, emacs was previously using "prog-mode" for C and > C++ source files but is now using a mode with some other name? > > I didn't chase down the name of the current mode, but just using "nil" > makes these variables get set on all files, (which should be mostly > harmless), and should be compatible with both old and new emacs. > > I did verify that the later change in this file (to indent with tabs > when in makefile-mode) still takes precendence as desired. > > While editing these files, I've also set things up to use a smaller > value for fill-column when editing a file within the ".git" > directory. This will help avoid commit messages getting wrapped when > "git log" adds some extra indentation. > > Note: If this change causes .dir-locals.el to take effect for someone > when it never had before, then emacs may prompt about the potentially > "unsafe" eval block here. User can reply to that prompt with "!" to > permanently whitelist this particular eval block as safe so that > prompt will not be seen again in the future. > --- > .dir-locals.el| 4 ++-- > src/gallium/drivers/freedreno/.dir-locals.el | 2 +- > src/gallium/drivers/r600/.dir-locals.el | 2 +- > src/gallium/drivers/radeon/.dir-locals.el | 2 +- > src/gallium/drivers/radeonsi/.dir-locals.el | 2 +- > src/gallium/drivers/vc4/.dir-locals.el| 2 +- > src/gallium/drivers/vc4/kernel/.dir-locals.el | 2 +- > src/gallium/winsys/radeon/.dir-locals.el | 2 +- > src/mesa/drivers/dri/nouveau/.dir-locals.el | 2 +- > 9 files changed, 10 insertions(+), 10 deletions(-) > > diff --git a/.dir-locals.el b/.dir-locals.el > index d95eb48..f44d964 100644 > --- a/.dir-locals.el > +++ b/.dir-locals.el > @@ -1,12 +1,12 @@ > -((prog-mode > +((nil >(indent-tabs-mode . nil) >(tab-width . 8) >(c-basic-offset . 3) >(c-file-style . "stroustrup") > - (fill-column . 78) >(eval . (progn > (c-set-offset 'innamespace '0) > (c-set-offset 'inline-open '0))) >) > + (".git" (nil (fill-column . 70))) > (makefile-mode (indent-tabs-mode . t)) > ) > diff --git a/src/gallium/drivers/freedreno/.dir-locals.el > b/src/gallium/drivers/freedreno/.dir-locals.el > index aa20d49..c26578b 100644 > --- a/src/gallium/drivers/freedreno/.dir-locals.el > +++ b/src/gallium/drivers/freedreno/.dir-locals.el > @@ -1,4 +1,4 @@ > -((prog-mode > +((nil >(indent-tabs-mode . true) >(tab-width . 4) >(c-basic-offset . 4) > diff --git a/src/gallium/drivers/r600/.dir-locals.el > b/src/gallium/drivers/r600/.dir-locals.el > index 4e35c12..8be6a30 100644 > --- a/src/gallium/drivers/r600/.dir-locals.el > +++ b/src/gallium/drivers/r600/.dir-locals.el > @@ -1,4 +1,4 @@ > -((prog-mode > +((nil >(indent-tabs-mode . true) >(tab-width . 8) >(c-basic-offset . 8) > diff --git a/src/gallium/drivers/radeon/.dir-locals.el > b/src/gallium/drivers/radeon/.dir-locals.el > index 4e35c12..8be6a30 100644 > --- a/src/gallium/drivers/radeon/.dir-locals.el > +++ b/src/gallium/drivers/radeon/.dir-locals.el > @@ -1,4 +1,4 @@ > -((prog-mode > +((nil >(indent-tabs-mode . true) >(tab-width . 8) >(c-basic-offset . 8) > diff --git a/src/gallium/drivers/radeonsi/.dir-locals.el > b/src/gallium/drivers/radeonsi/.dir-locals.el > index 4e35c12..8be6a30 100644 > --- a/src/gallium/drivers/radeonsi/.dir-locals.el > +++ b/src/gallium/drivers/radeonsi/.dir-locals.el > @@ -1,4 +1,4 @@ > -((prog-mode > +((nil >(indent-tabs-mode . true) >(tab-width . 8) >(c-basic-offset . 8) > diff --git a/src/gallium/drivers/vc4/.dir-locals.el > b/src/gallium/drivers/vc4/.dir-locals.el > index ac94242..ed10dc2 100644 > --- a/src/gallium/drivers/vc4/.dir-locals.el > +++ b/src/gallium/dri
[Mesa-dev] [PATCH] gallium/ttn: use single component address register
From: Rob Clark Only needs to be a vec1, and this helps out the later opt stages. From the shader (after opt) for fs-temp-array-mat3-index-col-row-wr goes, before: vec1 ssa_408 = imul ssa_155, ssa_1 vec4 ssa_413 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w vec4 ssa_166 = intrinsic load_uniform () () (0, 1) vec4 ssa_772 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z intrinsic store_var (ssa_772) (arr_5[ssa_413]) () vec4 ssa_416 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w vec4 ssa_178 = intrinsic load_uniform () () (1, 1) vec4 ssa_787 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z intrinsic store_var (ssa_787) (arr_5[1 + ssa_416]) () vec4 ssa_190 = intrinsic load_uniform () () (2, 1) vec4 ssa_802 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z intrinsic store_var (ssa_802) (arr_5[2 + ssa_416]) () after: vec1 ssa_408 = imul ssa_155, ssa_1 vec4 ssa_166 = intrinsic load_uniform () () (0, 1) vec4 ssa_763 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z intrinsic store_var (ssa_763) (arr_5[ssa_408]) () vec4 ssa_178 = intrinsic load_uniform () () (1, 1) vec4 ssa_778 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z intrinsic store_var (ssa_778) (arr_5[1 + ssa_408]) () vec4 ssa_190 = intrinsic load_uniform () () (2, 1) vec4 ssa_793 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z intrinsic store_var (ssa_793) (arr_5[2 + ssa_408]) () ie. it realizes the indirect is the same for all three store_var's which avoids my backend generating duplicate (mov (shl (cov))) instruction chains. Signed-off-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index f4c0bad..5bd8ca0 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -148,7 +148,7 @@ ttn_emit_declaration(struct ttn_compile *c) } } else if (file == TGSI_FILE_ADDRESS) { c->addr_reg = nir_local_reg_create(b->impl); - c->addr_reg->num_components = 4; + c->addr_reg->num_components = 1; } else if (file == TGSI_FILE_SAMPLER) { /* Nothing to record for samplers. */ } else { -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] DMA_BUF render targets disabled for intel
On Wed 08 Apr 2015, Volker Vogelhuber wrote: On 07.04.2015 21:54, Chad Versace wrote: On Thu 02 Apr 2015, Axel Davy wrote: Hi, you may be interesting look at this related bug report: https://bugs.freedesktop.org/show_bug.cgi?id=87452#c5 Yours, Axel Davy On 02/04/2015 11:58, Volker Vogelhuber wrote : We currently want to stream OpenGL output to an FPGA that does not provide a SG controller and should manage the transfers from the CPU memory to it's own hardware. For that reason we want to have the OpenGL driver (intel baytrail) to render at a specific memory area within the CPU system. Render to texture as it is possible e.g. on the PowerVR 530 seems not to be possible, as GL_TEXTURE_EXTERNAL_OES is not valid for glFrameBufferTexture2D and in contrast to the PowerVR OpenGL implementation, Mesa seems to prohibit the use of GL_TEXTURE_2D for textures created by glEGLImageTargetTexture2DOES (there is a check within Mesa where glEGLImageTargetTexture2DOES's target has to be equal to the target of the texture => GL_TEXTURE_EXTERNAL_OES != GL_TEXTURE_2D). So the only possible way to render to an EGLImage with memory allocated by myself seems to be the use of glEGLImageTargetRenderbufferStorageOES and bind this render buffer using glFramebufferRenderbuffer to the FBO. But for some reason, it seems to be forbidden to use an EGLImage imported from a dmabuf as render buffer. At least within src/mesa/drivers/dri/i965/intel_fbo.c there is a check: /* Buffers originating from outside are for read-only. */ if (image->dma_buf_imported) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEGLImageTargetRenderbufferStorage(dma buffers are read-only)"); return; } This prevents me from doing what I wanted to do and I googled a bit. I found someone else that just removed that check: https://github.com/kalyankondapally/Chromium-OzoneGBM/blob/master/0010-i965-remove-read-only-restriction-of-imported-buffer.patch That patch isn't safe for general renderbuffer usage... details below. (As an aside, Chrome OS also has a similar patch in their Mesa tree. But it's safe for Chrome OS, at least for now). Why it's safe only for ChromeOS? Do you mean it's not safe for X11 or is there something else, I should be aware of. Actually we're not using X11 ourselfs, but only raw DRM/KMS infrastructure. Not X11, something else. The probability of render corruption is proportional to the density of cleared pixels. Specifically, when the MCS buffer is present, the hardware groups the color buffer into blocks of pixels. If no pixel in a given block has been rendered since the previous glClear, then the pixel data in the color buffer is undefined for that block. The real data for the cleared block lives in the MCS. So, when a non-GL consumer of the dma_buf reads the color buffer (in Chrome OS's case, the consumer is Intel's display engine), it will read undefined pixel data for that block. (If the dma_buf consumer writes directly to the dma_buf, similar issues arise). KMS-based Chrome OS renders correctly due to luck: it never calls glClear on a dma_buf-backed renderbuffer. Chrome always renders to every pixel in the buffer. and after I did so myself, it just worked as I wanted it to work. I only wonder why this limitation has been added. Is it just for some pedantic reasons or is there any good reason why EGLImages imported from dmabuf descriptors shouldn't be used for render targets? There is a very good reason. It is not pedantic. And me and Tapani (CC'd) are working on enabling this. See [https://bugs.freedesktop.org/show_bug.cgi?id=87452#c7] for my work-in-progress patches. The reason is that, on Intel chipsets Ivybridge and newer, the i965 driver often expects each color buffer to have an auxiliary metadata buffer that holds compresson information. If the aux buffer does not exist, i965 will create it. If the metadata buffer and the real color buffer become unsynchronized (which is *very* likey when using a dma_buf as renderbuffer storage), you will get corrupt rendering. If you haven't got corrupt rendering, it's solely due to luck (and that luck is proportional to the density of cleared pixels exist in the buffer). Based on your patches I had a quick look in the source code for creating MCS buffers, but without knowing details about the intel GPUs I doubt it makes much sense for me to dive too deep into it. Therefore, i965 needs to be taught to disable aux buffers for dma_buf-backed storage. Before that happens, you risk corrupted images if you render to a dma_buf-backed renderbuffer. If you apply Kalyan's patch on top of my (untested) patches, then that should safely enable what you're doing with the FPGA. (There may be still be bugs with EGLImage orphaning semantics, but that likely won't affect you). Thanks again. Are there any forecasts when it will be available upstream? Soon. I'm going to validate the patches on an Ivybridge Chromebook today and immediately submit th
Re: [Mesa-dev] [PATCH] gallium/ttn: add support for temp arrays
Rob Clark writes: > From: Rob Clark > > Since the rest of NIR really would rather have these as variables rather > than registers, create a nir_variable per array. But rather than > completely re-arrange ttn to be variable based rather than register > based, keep the registers. In the cases where there is a matching var > for the reg, ttn_emit_instruction will append the appropriate intrinsic > to get things back from the shadow reg into the variable. > > NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give > an array id. But those just kinda suck, and should really go away. > AFAICT we don't get those from glsl. Might be an issue for some other > state tracker. > > v2: rework to use load_var/store_var with deref chains > > Signed-off-by: Rob Clark > --- > src/gallium/auxiliary/nir/tgsi_to_nir.c | 122 > +++- > 1 file changed, 103 insertions(+), 19 deletions(-) > > diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c > b/src/gallium/auxiliary/nir/tgsi_to_nir.c > index da935a4..f4c0bad 100644 > --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c > +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c > @@ -44,6 +44,7 @@ > struct ttn_reg_info { > /** nir register containing this TGSI index. */ > nir_register *reg; > + nir_variable *var; > /** Offset (in vec4s) from the start of var for this TGSI index. */ > int offset; > }; > @@ -121,22 +122,29 @@ ttn_emit_declaration(struct ttn_compile *c) > > if (file == TGSI_FILE_TEMPORARY) { >nir_register *reg; > - if (c->scan->indirect_files & (1 << file)) { > + nir_variable *var = NULL; > + > + if (decl->Declaration.Array) { > + /* for arrays, the register created just serves as a > + * shadow register. We append intrinsic_store_global > + * after the tgsi instruction is translated to move > + * back from the shadow register to the variable > + */ > + var = rzalloc(b->shader, nir_variable); > + > + var->type = glsl_array_type(glsl_vec4_type(), array_size); > + var->data.mode = nir_var_global; > + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); > + > + exec_list_push_tail(&b->shader->globals, &var->node); > + } > + > + for (i = 0; i < array_size; i++) { > reg = nir_local_reg_create(b->impl); > reg->num_components = 4; > - reg->num_array_elems = array_size; > - > - for (i = 0; i < array_size; i++) { > -c->temp_regs[decl->Range.First + i].reg = reg; > -c->temp_regs[decl->Range.First + i].offset = i; > - } > - } else { > - for (i = 0; i < array_size; i++) { > -reg = nir_local_reg_create(b->impl); > -reg->num_components = 4; > -c->temp_regs[decl->Range.First + i].reg = reg; > -c->temp_regs[decl->Range.First + i].offset = 0; > - } > + c->temp_regs[decl->Range.First + i].reg = reg; > + c->temp_regs[decl->Range.First + i].var = var; > + c->temp_regs[decl->Range.First + i].offset = i; Continuing to use array_size here doesn't make any sense to me, since if you're not handling variable array indices when generating stores into the array. So all you want is a single vec4 reg available so that you have something that our ALU op generation can do writemasked stores into, and you're picking an arbitrary one of them in ttn_get_dest(). I think this would make a ton more sense if ttn_get_dest() just returned a new vec4 local reg for the temporary, instead of having this sort-of-shadow thing. >} > } else if (file == TGSI_FILE_ADDRESS) { >c->addr_reg = nir_local_reg_create(b->impl); > @@ -245,6 +253,32 @@ ttn_emit_immediate(struct ttn_compile *c) > static nir_src * > ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register > *indirect); > > +/* generate either a constant or indirect deref chain for accessing an > + * array variable. > + */ > +static nir_deref_var * > +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset, > +struct tgsi_ind_register *indirect) > +{ > + nir_builder *b = &c->build; > + nir_deref_var *deref = nir_deref_var_create(b->shader, var); > + nir_deref_array *arr = nir_deref_array_create(b->shader); > + > + arr->base_offset = offset; > + arr->deref.type = glsl_get_array_element(var->type); > + > + if (indirect) { > + arr->deref_array_type = nir_deref_array_type_indirect; > + arr->indirect = nir_src_for_reg(c->addr_reg); > + } else { > + arr->deref_array_type = nir_deref_array_type_direct; > + } > + > + deref->deref.child = &arr->deref; > + > + return deref; > +} > + > static nir_src > ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned > index, > struct tgsi_ind_register *indirect) > @@ -256,10 +290,25 @@ ttn_src_for_file_and_index(struct ttn_com
Re: [Mesa-dev] [PATCH] Fix automatic indentation mode for recent emacs, use fewer columns in .git
On Wed, Apr 08 2015, Neil Roberts wrote: > It seems a bit strange that this has stopped working for you. Yes. I don't understand exactly what's going on. > mode. The C and C++ modes both inherit from prog-mode, as well as a > bunch of other ones such as Python and lisp files. That's what I guessed, (given that we have "prog-mode" in our files). I tried investigating a little bit, but didn't get too far. From an editor session editing an emacs file, (whether my standard environment or with "emacs -q"), "M-x describe-mode" says: C/l mode: Major mode for editing K&R and ANSI C code. Which looks pretty standard to me. But I don't know what the identifier for this mode would be to specify it in the .dir-locals.el file nor what modes it inherits from. > If you are using a > non-standard mode for C files it would be surprising if it doesn't also > inherit from prog-mode. I have just tested this with emacs -q (to > prevent it from loading my personal config) on Emacs 24.3.1 and it does > work as is. No non-standard mode here, (at least not intentionally). And I also verified the behavior is the same with "emacs -q". Maybe there's a Debian-specific bug that I'm hitting here? > I don't think the patch would break anything for me since you explicitly > set the fill-column back to 70 for commit messages so I don't care > enough to complain if you want to commit it anyway, but it does seem > like something fishy is going on and the reasoning in the commit message > doesn't add up. I won't disagree there. I don't know the actual root cause, but since this fixes an actual problem for me, and we haven't identified any negative side effects, I'll plan to commit this change. And if anyone can diagnose the root cause and improve .dir-locals.el further, that will be fine too. -Carl pgpD6Q3OM6D8Y.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Value Range Propagation in NIR (GSoC)
On Tue, Apr 7, 2015 at 4:52 PM, Connor Abbott wrote: > Hi Thomas, > > Thanks for submitting a proposal! Some comments/answers below. > > On Tue, Apr 7, 2015 at 3:34 PM, Thomas Helland > wrote: >> Hi, >> >> For those that don't know I've submitted a proposal for this years GSoC. >> I've proposed to implement value range propagation and loop unrolling in >> NIR. >> Since I'm no expert on compilers I've read up on some litterature: >> >> I started with "Constant propagation with conditional branches" (thanks >> Connor). >> This paper describes an algorithm, "sparse conditional constant >> propagation", >> that seems to be the defacto standard in compilers today. >> >> I also found the paper; >> "Accurate static branch prediction by value range propagation " (VRP). >> This describes a value range propagation implementation based on SCCP. >> (This also allows one to set heuristics to calculate educated guesses for >> the >> probability of a certain branch, but that's probably more than we're >> interested in.) > > Thanks for mentioning that... I had forgotten the name of that paper. > You're right in that the branch probability stuff isn't too useful for > us. Also, it raises an important issue about back-edges from phi > nodes; they present a more sophisticated method to handle it, but I > think that for now we can just force back edges to have an infinite > range unless they're constant. > >> >> There is also a GCC paper (with whatever licensing issues that may apply); >> "A propagation engine for GCC". >> They have a shared engine for doing all propagation passes. >> It handles the worklists, and the logic to traverse these. >> The implementing passes then supply callbacks to define the lattice rules. >> They reply back if the instruction was interesting or not, >> and the propagation engine basically handles the rest. >> >> Maybe that's an interesting solution? Or it might not be worth the hassle? >> We already have copy propagation, and with value range propagation >> we probably don't want separate constant propagation? >> (I'm hoping to write the pass so that it handles both constants and value >> ranges.) > > Yes, constant propagation probably won't be so useful once we have value > range propagation; the former is a special case of the latter. Note > that we have a nifty way of actually doing the constant folding > (nir_constant_expressions.py and nir_constant_expressions.h), which > you should still use if all the inputs are constant. When I started taking a stab at range propagation, I started by trying to extend the constant folding framework. I had a patch (http://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/nir-minmax) but it doesn't do nearly as much as I remembered. I don't know if it's practical to try and extend it or if we're better off just hand-rolling whatever we do for range handling. >> The GCC guys have used this engine to get copy propagation that propagates >> copies accross conditionals, maybe this makes such a solution more >> interesting? > > I'm not so sure how useful such a general framework will be. Constant > propagation that handles back-edges seems interesting, but I'm not > sure it's worth the time to implement something this general as a > first pass. Agreed. Let's just get it working first. >> >> Connor: I just remembered you saying something about your freedesktop >> git repo, so I poked around some and found that you have already done >> some work on VRP based on SCCP? How far did you get? > > I started on it, but then I realized that the approach I was using was > too cumbersome/complicated so I don't think what I have is too useful. > Feel free to work on it yourself, although Jason and I have discussed > it so we have some ideas of how to do it. I've written a few notes on > this below that you may find useful. > > - I have a branch I created while working on VRP that you'll probably > find useful: http://cgit.freedesktop.org/~cwabbott0/mesa/log/?h=nir-worklist > . The first two commits are already in master, but the last two should > be useful for implementing SCCP/VRP (although they'll need to be > rebased, obviously). > > - There's a comment in the SCCP paper (5.3, Nodes versus Edges) that > says: "An alternative way of implementing this would be to add nodes > to the > graph and then associate an ExecutableFlag with each node. An > additional node must be inserted between any node that has more than > one immediate successor and any successor node that has more than one > immediate predecessor." I think this procedure is what's usually > called "splitting critical edges"; in NIR, thanks to the structured > control flow, there are never any critical edges except for one edge > case you don't really have to care about too much (namely, an infinite > loop with one basic block) and therefore you can just use the basic > block worklist that I added in the branch mentioned above, rather than > a worklist of basic block edges as the paper describes. > > - Th
Re: [Mesa-dev] [PATCH] scons: add target gallium-osmesa
Hi Olivier Thanks for the patch ! Adding Jose to the Cc list as I believe he'll have some input on the topic. On 3 April 2015 at 15:06, wrote: > From: Olivier Pena > > --- > src/gallium/SConscript | 5 > src/gallium/state_trackers/osmesa/SConscript | 25 + > src/gallium/state_trackers/osmesa/osmesa.def | 16 +++ > src/gallium/targets/osmesa/SConscript| 41 > > 4 files changed, 87 insertions(+) > create mode 100644 src/gallium/state_trackers/osmesa/SConscript > create mode 100644 src/gallium/state_trackers/osmesa/osmesa.def > create mode 100644 src/gallium/targets/osmesa/SConscript > Can you add the three new files into the EXTRA_DIST variable in the relevant Makefile.am ? This way one we can build scons gallium-osmesa from a release tarball :-) > diff --git a/src/gallium/SConscript b/src/gallium/SConscript > index 680ad92..eeb1c78 100644 > --- a/src/gallium/SConscript > +++ b/src/gallium/SConscript > @@ -60,6 +60,11 @@ SConscript([ > ]) > > if not env['embedded']: > +SConscript([ > +'state_trackers/osmesa/SConscript', > +'targets/osmesa/SConscript', > +]) > + > if env['x11']: > SConscript([ > 'state_trackers/glx/xlib/SConscript', > diff --git a/src/gallium/state_trackers/osmesa/SConscript > b/src/gallium/state_trackers/osmesa/SConscript > new file mode 100644 > index 000..fa7c968 > --- /dev/null > +++ b/src/gallium/state_trackers/osmesa/SConscript > @@ -0,0 +1,25 @@ > +import os > + > +Import('*') > + > +env = env.Clone() > + > +env.Append(CPPPATH = [ > +'#src/mapi', > +'#src/mesa', > +'.', > +]) > + > +env.AppendUnique(CPPDEFINES = [ > +'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers > +'WIN32_LEAN_AND_MEAN', # > http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx > +]) > +if not env['gles']: > +# prevent _glapi_* from being declared __declspec(dllimport) > +env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) > + Shouldn't these be used when building for windows only ? > +st_osmesa = env.ConvenienceLibrary( > +target ='st_osmesa', > +source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'), > +) > +Export('st_osmesa') > diff --git a/src/gallium/state_trackers/osmesa/osmesa.def > b/src/gallium/state_trackers/osmesa/osmesa.def > new file mode 100644 > index 000..e2a31ab > --- /dev/null > +++ b/src/gallium/state_trackers/osmesa/osmesa.def Can we move this file next to it's only user - i.e. into targets/osmesa/ ? > @@ -0,0 +1,16 @@ > +;DESCRIPTION 'Mesa OSMesa lib for Win32' > +VERSION 4.1 > + > +EXPORTS > + OSMesaCreateContext > + OSMesaCreateContextExt > + OSMesaDestroyContext > + OSMesaMakeCurrent > + OSMesaGetCurrentContext > + OSMesaPixelStore > + OSMesaGetIntegerv > + OSMesaGetDepthBuffer > + OSMesaGetColorBuffer > + OSMesaGetProcAddress > + OSMesaColorClamp > + OSMesaPostprocess > diff --git a/src/gallium/targets/osmesa/SConscript > b/src/gallium/targets/osmesa/SConscript > new file mode 100644 > index 000..2c936cf > --- /dev/null > +++ b/src/gallium/targets/osmesa/SConscript > @@ -0,0 +1,41 @@ > +Import('*') > + > +env = env.Clone() > + > +env.Prepend(CPPPATH = [ > +'#src/mapi', > +'#src/mesa', > +#Dir('../../../mapi'), # src/mapi build path for python-generated GL API > files/headers > +]) > + > +sources = [ > +'target.c', > +] > +sources += ['#src/gallium/state_trackers/osmesa/osmesa.def'] > + Afaict this should be included only if the target is Windows. > +drivers = [] > + > +if env['llvm']: > +env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') > +env.Append(CPPDEFINES = 'GALLIUM_TRACE') > +drivers += [llvmpipe] > +else: > +env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') > +env.Append(CPPDEFINES = 'GALLIUM_TRACE') > +drivers += [softpipe] > + One should include softpipe unconditionally as we can switch between llvmpipe and softpipe at runtime. > +if env['platform'] == 'windows': > +env.AppendUnique(CPPDEFINES = [ > +'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers > +]) > +if not env['gles']: > +# prevent _glapi_* from being declared __declspec(dllimport) > +env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) > + Don't think you need this if block. > +gallium_osmesa = env.SharedLibrary( > +target ='osmesa', > +source = sources, > +LIBS = drivers + st_osmesa + ws_null + glapi + mesa + gallium + > trace + glsl + mesautil + env['LIBS'], How about we move this before the SharedLibrary construct and use env.Prepend(LIBS =... like other places in mesa ? Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi: remove bogus r600-- triple
As mentioned by Michel Dänzer for LLVM >= 3.6 we create the LLVMTargetMachine (with triple amdgcn--), as we setup the radeonsi context. For older LLVM or hardware (r600) the triple is always r600-- and is created at a later stage - radeon_llvm_compile() Cc: Michel Dänzer Signed-off-by: Emil Velikov --- src/gallium/drivers/radeonsi/si_pipe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e761d20..5ea8868 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -85,8 +85,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * LLVMTargetRef r600_target; #if HAVE_LLVM >= 0x0306 const char *triple = "amdgcn--"; -#else - const char *triple = "r600--"; #endif int shader, i; -- 2.3.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] clover: Call clBuildProgram() notification function when build completes
Hi Tom, Just a friendly reminder that this patch hasn't landed in master yet. Just making sure it doesn't fall through the cracks :-) Cheers Emil On 24 March 2015 at 19:44, Tom Stellard wrote: > Cc: 10.5 10.4 > --- > src/gallium/state_trackers/clover/api/program.cpp | 4 > 1 file changed, 4 insertions(+) > > diff --git a/src/gallium/state_trackers/clover/api/program.cpp > b/src/gallium/state_trackers/clover/api/program.cpp > index 60184ed..fcec1d7 100644 > --- a/src/gallium/state_trackers/clover/api/program.cpp > +++ b/src/gallium/state_trackers/clover/api/program.cpp > @@ -180,8 +180,12 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs, > validate_build_program_common(prog, num_devs, d_devs, pfn_notify, > user_data); > > prog.build(devs, opts); > + if (pfn_notify) > + pfn_notify(d_prog, user_data); > return CL_SUCCESS; > } catch (error &e) { > + if (pfn_notify) > + pfn_notify(d_prog, user_data); > if (e.get() == CL_INVALID_COMPILER_OPTIONS) >return CL_INVALID_BUILD_OPTIONS; > if (e.get() == CL_COMPILE_PROGRAM_FAILURE) > -- > 2.0.4 > > ___ > mesa-stable mailing list > mesa-sta...@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-stable ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] [PATCH] i965: Add XRGB8888 format to intel_screen_make_configs
Hi all, Can we get a pair of eyes on this patch please ? Boyan For the future can you please include the CC mesa-stable line in the commit message. It will make things a bit more obvious as I'm pursing through the list :-) Thanks Emil On 25 March 2015 at 11:36, Boyan Ding wrote: > Some application, such as drm backend of weston, uses XRGB config as > default. i965 doesn't provide this format, but before commit 65c8965d, > the drm platform of EGL takes ARGB as XRGB. Now that commit > 65c8965d makes EGL recognize format correctly so weston won't start > because it can't find XRGB. Add XRGB format to i965 just as > other drivers do. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689 > Signed-off-by: Boyan Ding > --- > src/mesa/drivers/dri/i965/intel_screen.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > b/src/mesa/drivers/dri/i965/intel_screen.c > index 3640b67..2b82c33 100644 > --- a/src/mesa/drivers/dri/i965/intel_screen.c > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > @@ -1126,7 +1126,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen) > { > static const mesa_format formats[] = { >MESA_FORMAT_B5G6R5_UNORM, > - MESA_FORMAT_B8G8R8A8_UNORM > + MESA_FORMAT_B8G8R8A8_UNORM, > + MESA_FORMAT_B8G8R8X8_UNORM > }; > > /* GLX_SWAP_COPY_OML is not supported due to page flipping. */ > -- > 2.3.3 > > ___ > mesa-stable mailing list > mesa-sta...@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-stable ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().
On Wed, Apr 8, 2015 at 2:25 AM, Martin Peres wrote: > On 08/04/15 10:06, Kenneth Graunke wrote: >> >> Previously, we translated into NIR and did all the optimizations and >> lowering as part of running fs_visitor. This meant that we did all of >> that work twice for fragment shaders - once for SIMD8, and again for >> SIMD16. We also had to redo it every time we hit a state based >> recompile. >> >> We now generate NIR once at link time. ARB programs don't have linking, >> so we instead generate it at ProgramStringNotify time. >> >> Mesa's fixed function vertex program handling doesn't bother to inform >> the driver about new programs at all (which is rather mean), so we >> generate NIR at the last minute, if it hasn't happened already. >> >> shader-db runs ~9.4% faster on my i7-5600U, with a release build. > > > Nice speed improvement but wouldn't it affect negatively programs using SSO > to recombine shaders at run time? No. I think with SSO we basically just delay linking until they actually use the shader. The same linking function gets called either way. >> >> Signed-off-by: Kenneth Graunke >> --- >> src/mesa/drivers/dri/i965/Makefile.sources | 1 + >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 174 +-- >> src/mesa/drivers/dri/i965/brw_nir.c| 213 >> + >> src/mesa/drivers/dri/i965/brw_nir.h| 6 + >> src/mesa/drivers/dri/i965/brw_program.c| 7 + >> src/mesa/drivers/dri/i965/brw_shader.cpp | 6 + >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 ++- >> src/mesa/main/mtypes.h | 2 + >> src/mesa/program/program.c | 5 + >> 9 files changed, 255 insertions(+), 176 deletions(-) >> create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c >> >> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >> b/src/mesa/drivers/dri/i965/Makefile.sources >> index 498d5a7..6d4659f 100644 >> --- a/src/mesa/drivers/dri/i965/Makefile.sources >> +++ b/src/mesa/drivers/dri/i965/Makefile.sources >> @@ -77,6 +77,7 @@ i965_FILES = \ >> brw_misc_state.c \ >> brw_multisample_state.h \ >> brw_nir.h \ >> + brw_nir.c \ >> brw_nir_analyze_boolean_resolves.c \ >> brw_object_purgeable.c \ >> brw_packed_float.c \ >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index 034b79a..ccffd5d 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -28,175 +28,10 @@ >> #include "brw_fs.h" >> #include "brw_nir.h" >> -static void >> -nir_optimize(nir_shader *nir) >> -{ >> - bool progress; >> - do { >> - progress = false; >> - nir_lower_vars_to_ssa(nir); >> - nir_validate_shader(nir); >> - nir_lower_alu_to_scalar(nir); >> - nir_validate_shader(nir); >> - progress |= nir_copy_prop(nir); >> - nir_validate_shader(nir); >> - nir_lower_phis_to_scalar(nir); >> - nir_validate_shader(nir); >> - progress |= nir_copy_prop(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_dce(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_cse(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_peephole_select(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_algebraic(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_constant_folding(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_remove_phis(nir); >> - nir_validate_shader(nir); >> - } while (progress); >> -} >> - >> -static bool >> -count_nir_instrs_in_block(nir_block *block, void *state) >> -{ >> - int *count = (int *) state; >> - nir_foreach_instr(block, instr) { >> - *count = *count + 1; >> - } >> - return true; >> -} >> - >> -static int >> -count_nir_instrs(nir_shader *nir) >> -{ >> - int count = 0; >> - nir_foreach_overload(nir, overload) { >> - if (!overload->impl) >> - continue; >> - nir_foreach_block(overload->impl, count_nir_instrs_in_block, >> &count); >> - } >> - return count; >> -} >> - >> void >> fs_visitor::emit_nir_code() >> { >> - const nir_shader_compiler_options *options = >> - ctx->Const.ShaderCompilerOptions[stage].NirOptions; >> - >> - nir_shader *nir; >> - /* First, lower the GLSL IR or Mesa IR to NIR */ >> - if (shader_prog) { >> - nir = glsl_to_nir(&shader->base, options); >> - } else { >> - nir = prog_to_nir(prog, options); >> - nir_convert_to_ssa(nir); /* turn registers into SSA */ >> - } >> - nir_validate_shader(nir); >> - >> - nir_lower_global_vars_to_local(nir); >> - nir_validate_shader(nir); >> - >> - nir_lower_tex_projector(nir); >> - nir_validate_shader(nir); >> - >> - nir_normalize_cubemap_coords(nir); >> - nir_validate_shader(nir); >> - >> - nir_split_var_copies(nir); >> - nir_validate_shader(nir
Re: [Mesa-dev] [PATCH 10/12] nir: Store num_direct_uniforms in the nir_shader.
On Wed, Apr 8, 2015 at 12:06 AM, Kenneth Graunke wrote: > Storing this here is pretty sketchy - I don't know if any driver other > than i965 will want to use it. But this will make it a lot easier to > generate NIR code at link time. We'll probably rework it anyway. Yeah, it's sketchy but, honestly, storing the number of uniforms/inputs/outputs is also kinda scketchy. I can't think of anythiing better at the moment so let's just go with it. > Signed-off-by: Kenneth Graunke > --- > src/glsl/nir/nir.h | 3 +++ > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++-- > 2 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index e844e4d..7d11996 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1429,6 +1429,9 @@ typedef struct nir_shader { > * access plus one > */ > unsigned num_inputs, num_uniforms, num_outputs; > + > + /** the number of uniforms that are only accessed directly */ > + unsigned num_direct_uniforms; > } nir_shader; > > #define nir_foreach_overload(shader, overload)\ > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 145a447..034b79a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -120,7 +120,7 @@ fs_visitor::emit_nir_code() > > if (shader_prog) { >nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, > - &num_direct_uniforms, > + &nir->num_direct_uniforms, > &nir->num_uniforms); > } else { >/* ARB programs generally create a giant array of "uniform" data, and > allow > @@ -128,7 +128,7 @@ fs_visitor::emit_nir_code() > * analysis, it's all or nothing. num_direct_uniforms is only useful > when > * we have some direct and some indirect access; it doesn't matter > here. > */ > - num_direct_uniforms = 0; > + nir->num_direct_uniforms = 0; > } > nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); > nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); > @@ -343,6 +343,7 @@ void > fs_visitor::nir_setup_uniforms(nir_shader *shader) > { > uniforms = shader->num_uniforms; > + num_direct_uniforms = shader->num_direct_uniforms; > > /* We split the uniform register file in half. The first half is > * entirely direct uniforms. The second half is indirect. > -- > 2.3.5 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/12] i965: Generate NIR at link time
Series is Reviewed-by: Jason Ekstrand On Wed, Apr 8, 2015 at 12:06 AM, Kenneth Graunke wrote: > Hello, > > This series makes i965 generate NIR at link time (or ProgramStringNotify > time for ARB programs), rather than on each FS/VS compile. This means > we only do it once, rather than for SIMD8 and again for SIMD16 programs. > It also means we can avoid it when doing state based recompiles. > > It speeds up shader-db on my Broadwell by about 9.4%. > > It also adds INTEL_DEBUG=ann support, now that we keep a persistent copy > of the NIR program around for the annotations to refer to. > > Available in the 'nir-link' branch of ~kwg/mesa. > > --Ken > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] clover: Update the wait_count of the correct event when chaining events
Hi Tom, Ping for patch#2 for clover. Do let me know if either one is no longer applicable. Thanks Emil On 25 March 2015 at 17:43, Tom Stellard wrote: > Cc: 10.5 10.4 > --- > src/gallium/state_trackers/clover/core/event.cpp | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/state_trackers/clover/core/event.cpp > b/src/gallium/state_trackers/clover/core/event.cpp > index 58de888..9d78b48 100644 > --- a/src/gallium/state_trackers/clover/core/event.cpp > +++ b/src/gallium/state_trackers/clover/core/event.cpp > @@ -67,7 +67,7 @@ event::signalled() const { > void > event::chain(event &ev) { > if (wait_count) { > - ev.wait_count++; > + wait_count++; >_chain.push_back(ev); > } > ev.deps.push_back(*this); > -- > 2.0.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] clover: Update the wait_count of the correct event when chaining events
On Wed, Apr 08, 2015 at 07:41:55PM +0100, Emil Velikov wrote: > Hi Tom, > > Ping for patch#2 for clover. Do let me know if either one is no longer > applicable. > This patch was rejected, it's no longer applicable. -Tom > Thanks > Emil > > On 25 March 2015 at 17:43, Tom Stellard wrote: > > Cc: 10.5 10.4 > > --- > > src/gallium/state_trackers/clover/core/event.cpp | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/src/gallium/state_trackers/clover/core/event.cpp > > b/src/gallium/state_trackers/clover/core/event.cpp > > index 58de888..9d78b48 100644 > > --- a/src/gallium/state_trackers/clover/core/event.cpp > > +++ b/src/gallium/state_trackers/clover/core/event.cpp > > @@ -67,7 +67,7 @@ event::signalled() const { > > void > > event::chain(event &ev) { > > if (wait_count) { > > - ev.wait_count++; > > + wait_count++; > >_chain.push_back(ev); > > } > > ev.deps.push_back(*this); > > -- > > 2.0.4 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] gallivm: don't use control flow when doing indirect constant buffer lookups
Series looks good to me. Just a few suggestions inline. On 04/04/15 15:50, srol...@vmware.com wrote: From: Roland Scheidegger llvm goes crazy when doing that, using way more memory and time, though there's probably more to it - this points to a very much similar issue as fixed in 8a9f5ecdb116d0449d63f7b94efbfa8b205d826f. In any case I've seen a quite plain looking vertex shader with just ~50 simple tgsi instructions (but with a dozen or so such indirect constant buffer lookups) go from a terribly high ~440ms compile time (consuming 25MB of memory in the process) down to a still awful ~230ms and 13MB with this fix (with llvm 3.3), so there's still obvious improvements possible (but I have no clue why it's so slow...). The resulting shader is most likely also faster (certainly seemed so though I don't have any hard numbers as it may have been influenced by compile times) since generally fetching constants outside the buffer range is most likely an app error (that is we expect all indices to be valid). It is possible this fixes some mysterious vertex shader slowdowns we've seen ever since we are conforming to newer apis at least partially (the main draw loop also has similar looking conditionals which we probably could do without - if not for the fetch at least for the additional elts condition.) --- src/gallium/auxiliary/draw/draw_llvm.h | 2 + .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 27 +++--- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 95 +- src/gallium/drivers/llvmpipe/lp_scene.h| 2 + src/gallium/drivers/llvmpipe/lp_setup.c| 6 +- 5 files changed, 63 insertions(+), 69 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 9565fc6..a1983e1 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -472,6 +472,8 @@ struct draw_llvm { struct draw_gs_llvm_variant_list_item gs_variants_list; int nr_gs_variants; + + float fake_const_buf[4]; Couldn't we make fake_const_buf a mere local static const array instead? It would save memory. }; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 0dfafdc..03257d8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -273,28 +273,35 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle) { struct llvm_middle_end *fpme = llvm_middle_end(middle); struct draw_context *draw = fpme->draw; + struct draw_llvm *llvm = fpme->llvm; unsigned i; - for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) { + for (i = 0; i < Elements(llvm->jit_context.vs_constants); ++i) { int num_consts = draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4); - fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; - fpme->llvm->jit_context.num_vs_constants[i] = num_consts; + llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; + llvm->jit_context.num_vs_constants[i] = num_consts; + if (num_consts == 0) { + llvm->jit_context.vs_constants[i] = llvm->fake_const_buf; + } } - for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) { + for (i = 0; i < Elements(llvm->gs_jit_context.constants); ++i) { int num_consts = draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4); - fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; - fpme->llvm->gs_jit_context.num_constants[i] = num_consts; + llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; + llvm->gs_jit_context.num_constants[i] = num_consts; + if (num_consts == 0) { + llvm->gs_jit_context.constants[i] = llvm->fake_const_buf; + } } - fpme->llvm->jit_context.planes = + llvm->jit_context.planes = (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; - fpme->llvm->gs_jit_context.planes = + llvm->gs_jit_context.planes = (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; - fpme->llvm->jit_context.viewports = draw->viewports; - fpme->llvm->gs_jit_context.viewports = draw->viewports; + llvm->jit_context.viewports = draw->viewports; + llvm->gs_jit_context.viewports = draw->viewports; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 17b68ff..5aa2846 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -944,20 +944,39 @@ gather_outputs(struct lp_build_tgsi_soa_context * bld) * with a little work. */ static LLVMValueRef -build_gather(struct lp_build_context *bld, +build_gather(struct lp_build_tgsi_context *
Re: [Mesa-dev] [PATCH] gallium/ttn: add support for temp arrays
On Wed, Apr 8, 2015 at 11:14 AM, Eric Anholt wrote: > Rob Clark writes: > >> From: Rob Clark >> >> Since the rest of NIR really would rather have these as variables rather >> than registers, create a nir_variable per array. But rather than >> completely re-arrange ttn to be variable based rather than register >> based, keep the registers. In the cases where there is a matching var >> for the reg, ttn_emit_instruction will append the appropriate intrinsic >> to get things back from the shadow reg into the variable. >> >> NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give >> an array id. But those just kinda suck, and should really go away. >> AFAICT we don't get those from glsl. Might be an issue for some other >> state tracker. >> >> v2: rework to use load_var/store_var with deref chains >> >> Signed-off-by: Rob Clark >> --- >> src/gallium/auxiliary/nir/tgsi_to_nir.c | 122 >> +++- >> 1 file changed, 103 insertions(+), 19 deletions(-) >> >> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c >> b/src/gallium/auxiliary/nir/tgsi_to_nir.c >> index da935a4..f4c0bad 100644 >> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c >> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c >> @@ -44,6 +44,7 @@ >> struct ttn_reg_info { >> /** nir register containing this TGSI index. */ >> nir_register *reg; >> + nir_variable *var; >> /** Offset (in vec4s) from the start of var for this TGSI index. */ >> int offset; >> }; >> @@ -121,22 +122,29 @@ ttn_emit_declaration(struct ttn_compile *c) >> >> if (file == TGSI_FILE_TEMPORARY) { >>nir_register *reg; >> - if (c->scan->indirect_files & (1 << file)) { >> + nir_variable *var = NULL; >> + >> + if (decl->Declaration.Array) { >> + /* for arrays, the register created just serves as a >> + * shadow register. We append intrinsic_store_global >> + * after the tgsi instruction is translated to move >> + * back from the shadow register to the variable >> + */ >> + var = rzalloc(b->shader, nir_variable); >> + >> + var->type = glsl_array_type(glsl_vec4_type(), array_size); >> + var->data.mode = nir_var_global; >> + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); >> + >> + exec_list_push_tail(&b->shader->globals, &var->node); >> + } >> + >> + for (i = 0; i < array_size; i++) { >> reg = nir_local_reg_create(b->impl); >> reg->num_components = 4; >> - reg->num_array_elems = array_size; >> - >> - for (i = 0; i < array_size; i++) { >> -c->temp_regs[decl->Range.First + i].reg = reg; >> -c->temp_regs[decl->Range.First + i].offset = i; >> - } >> - } else { >> - for (i = 0; i < array_size; i++) { >> -reg = nir_local_reg_create(b->impl); >> -reg->num_components = 4; >> -c->temp_regs[decl->Range.First + i].reg = reg; >> -c->temp_regs[decl->Range.First + i].offset = 0; >> - } >> + c->temp_regs[decl->Range.First + i].reg = reg; >> + c->temp_regs[decl->Range.First + i].var = var; >> + c->temp_regs[decl->Range.First + i].offset = i; > > Continuing to use array_size here doesn't make any sense to me, since if > you're not handling variable array indices when generating stores into > the array. So all you want is a single vec4 reg available so that you > have something that our ALU op generation can do writemasked stores > into, and you're picking an arbitrary one of them in ttn_get_dest(). > > I think this would make a ton more sense if ttn_get_dest() just returned > a new vec4 local reg for the temporary, instead of having this > sort-of-shadow thing. > so the shadow registers did make things like: DCL TEMP[0..2], ARRAY(1), LOCAL 1: MOV TEMP[1].x, IN[1]. 2: MOV TEMP[1].yz, IN[2].yxyy much easier to deal with.. I'm still thinking about how to handle that w/ the create-new-temp-register-each-time approach.. but yeah, doesn't work as well if you have indirect dst. BR, -R >>} >> } else if (file == TGSI_FILE_ADDRESS) { >>c->addr_reg = nir_local_reg_create(b->impl); >> @@ -245,6 +253,32 @@ ttn_emit_immediate(struct ttn_compile *c) >> static nir_src * >> ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register >> *indirect); >> >> +/* generate either a constant or indirect deref chain for accessing an >> + * array variable. >> + */ >> +static nir_deref_var * >> +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset, >> +struct tgsi_ind_register *indirect) >> +{ >> + nir_builder *b = &c->build; >> + nir_deref_var *deref = nir_deref_var_create(b->shader, var); >> + nir_deref_array *arr = nir_deref_array_create(b->shader); >> + >> + arr->base_offset = offset; >> + arr->deref.type = glsl_get_array_element(var->type); >> + >> + if (indirect) { >> +
Re: [Mesa-dev] [PATCH 10/12] nir: Store num_direct_uniforms in the nir_shader.
On 04/08/2015 12:06 AM, Kenneth Graunke wrote: > Storing this here is pretty sketchy - I don't know if any driver other > than i965 will want to use it. But this will make it a lot easier to > generate NIR code at link time. We'll probably rework it anyway. > > Signed-off-by: Kenneth Graunke > --- > src/glsl/nir/nir.h | 3 +++ > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++-- > 2 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index e844e4d..7d11996 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1429,6 +1429,9 @@ typedef struct nir_shader { > * access plus one > */ > unsigned num_inputs, num_uniforms, num_outputs; > + > + /** the number of uniforms that are only accessed directly */ > + unsigned num_direct_uniforms; > } nir_shader; > > #define nir_foreach_overload(shader, overload)\ > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 145a447..034b79a 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -120,7 +120,7 @@ fs_visitor::emit_nir_code() > > if (shader_prog) { >nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms, > - &num_direct_uniforms, > + &nir->num_direct_uniforms, > &nir->num_uniforms); Why not just have nir_assign_var_locations_scalar_direct_first modify the nir_shader passed in? That seems more concise. > } else { >/* ARB programs generally create a giant array of "uniform" data, and > allow > @@ -128,7 +128,7 @@ fs_visitor::emit_nir_code() > * analysis, it's all or nothing. num_direct_uniforms is only useful > when > * we have some direct and some indirect access; it doesn't matter > here. > */ > - num_direct_uniforms = 0; > + nir->num_direct_uniforms = 0; > } > nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs); > nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs); > @@ -343,6 +343,7 @@ void > fs_visitor::nir_setup_uniforms(nir_shader *shader) > { > uniforms = shader->num_uniforms; > + num_direct_uniforms = shader->num_direct_uniforms; > > /* We split the uniform register file in half. The first half is > * entirely direct uniforms. The second half is indirect. > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().
On 04/08/2015 02:25 AM, Martin Peres wrote: > On 08/04/15 10:06, Kenneth Graunke wrote: >> Previously, we translated into NIR and did all the optimizations and >> lowering as part of running fs_visitor. This meant that we did all of >> that work twice for fragment shaders - once for SIMD8, and again for >> SIMD16. We also had to redo it every time we hit a state based >> recompile. >> >> We now generate NIR once at link time. ARB programs don't have linking, >> so we instead generate it at ProgramStringNotify time. >> >> Mesa's fixed function vertex program handling doesn't bother to inform >> the driver about new programs at all (which is rather mean), so we >> generate NIR at the last minute, if it hasn't happened already. >> >> shader-db runs ~9.4% faster on my i7-5600U, with a release build. > > Nice speed improvement but wouldn't it affect negatively programs using > SSO to recombine shaders at run time? Hm... that's a fair question. Does NIR do any cross-stage optimization? >> Signed-off-by: Kenneth Graunke >> --- >> src/mesa/drivers/dri/i965/Makefile.sources | 1 + >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 174 >> +-- >> src/mesa/drivers/dri/i965/brw_nir.c| 213 >> + >> src/mesa/drivers/dri/i965/brw_nir.h| 6 + >> src/mesa/drivers/dri/i965/brw_program.c| 7 + >> src/mesa/drivers/dri/i965/brw_shader.cpp | 6 + >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 ++- >> src/mesa/main/mtypes.h | 2 + >> src/mesa/program/program.c | 5 + >> 9 files changed, 255 insertions(+), 176 deletions(-) >> create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c >> >> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >> b/src/mesa/drivers/dri/i965/Makefile.sources >> index 498d5a7..6d4659f 100644 >> --- a/src/mesa/drivers/dri/i965/Makefile.sources >> +++ b/src/mesa/drivers/dri/i965/Makefile.sources >> @@ -77,6 +77,7 @@ i965_FILES = \ >> brw_misc_state.c \ >> brw_multisample_state.h \ >> brw_nir.h \ >> +brw_nir.c \ >> brw_nir_analyze_boolean_resolves.c \ >> brw_object_purgeable.c \ >> brw_packed_float.c \ >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index 034b79a..ccffd5d 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -28,175 +28,10 @@ >> #include "brw_fs.h" >> #include "brw_nir.h" >> -static void >> -nir_optimize(nir_shader *nir) >> -{ >> - bool progress; >> - do { >> - progress = false; >> - nir_lower_vars_to_ssa(nir); >> - nir_validate_shader(nir); >> - nir_lower_alu_to_scalar(nir); >> - nir_validate_shader(nir); >> - progress |= nir_copy_prop(nir); >> - nir_validate_shader(nir); >> - nir_lower_phis_to_scalar(nir); >> - nir_validate_shader(nir); >> - progress |= nir_copy_prop(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_dce(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_cse(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_peephole_select(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_algebraic(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_constant_folding(nir); >> - nir_validate_shader(nir); >> - progress |= nir_opt_remove_phis(nir); >> - nir_validate_shader(nir); >> - } while (progress); >> -} >> - >> -static bool >> -count_nir_instrs_in_block(nir_block *block, void *state) >> -{ >> - int *count = (int *) state; >> - nir_foreach_instr(block, instr) { >> - *count = *count + 1; >> - } >> - return true; >> -} >> - >> -static int >> -count_nir_instrs(nir_shader *nir) >> -{ >> - int count = 0; >> - nir_foreach_overload(nir, overload) { >> - if (!overload->impl) >> - continue; >> - nir_foreach_block(overload->impl, count_nir_instrs_in_block, >> &count); >> - } >> - return count; >> -} >> - >> void >> fs_visitor::emit_nir_code() >> { >> - const nir_shader_compiler_options *options = >> - ctx->Const.ShaderCompilerOptions[stage].NirOptions; >> - >> - nir_shader *nir; >> - /* First, lower the GLSL IR or Mesa IR to NIR */ >> - if (shader_prog) { >> - nir = glsl_to_nir(&shader->base, options); >> - } else { >> - nir = prog_to_nir(prog, options); >> - nir_convert_to_ssa(nir); /* turn registers into SSA */ >> - } >> - nir_validate_shader(nir); >> - >> - nir_lower_global_vars_to_local(nir); >> - nir_validate_shader(nir); >> - >> - nir_lower_tex_projector(nir); >> - nir_validate_shader(nir); >> - >> - nir_normalize_cubemap_coords(nir); >> - nir_validate_shader(nir); >> - >> - nir_split_var_copies(nir); >> - nir_validate_shader(nir); >> - >> - nir_optimize(nir); >> - >> - /* Lower a bunch of stuff */ >> - nir_lower_var_
Re: [Mesa-dev] [PATCH 00/12] i965: Generate NIR at link time
Patches 1 through 9 and 12 are Reviewed-by: Ian Romanick The other 3 have some comments / discussion. On 04/08/2015 12:06 AM, Kenneth Graunke wrote: > Hello, > > This series makes i965 generate NIR at link time (or ProgramStringNotify > time for ARB programs), rather than on each FS/VS compile. This means > we only do it once, rather than for SIMD8 and again for SIMD16 programs. > It also means we can avoid it when doing state based recompiles. > > It speeds up shader-db on my Broadwell by about 9.4%. > > It also adds INTEL_DEBUG=ann support, now that we keep a persistent copy > of the NIR program around for the annotations to refer to. > > Available in the 'nir-link' branch of ~kwg/mesa. > > --Ken > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: add target gallium-osmesa
Besides the issue Emil mentioned, one minor request: lets call the target just "osmesa". As we don't plan to have any other "osmesa" target. Jose On 08/04/15 18:18, Emil Velikov wrote: Hi Olivier Thanks for the patch ! Adding Jose to the Cc list as I believe he'll have some input on the topic. On 3 April 2015 at 15:06, wrote: From: Olivier Pena --- src/gallium/SConscript | 5 src/gallium/state_trackers/osmesa/SConscript | 25 + src/gallium/state_trackers/osmesa/osmesa.def | 16 +++ src/gallium/targets/osmesa/SConscript| 41 4 files changed, 87 insertions(+) create mode 100644 src/gallium/state_trackers/osmesa/SConscript create mode 100644 src/gallium/state_trackers/osmesa/osmesa.def create mode 100644 src/gallium/targets/osmesa/SConscript Can you add the three new files into the EXTRA_DIST variable in the relevant Makefile.am ? This way one we can build scons gallium-osmesa from a release tarball :-) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 680ad92..eeb1c78 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -60,6 +60,11 @@ SConscript([ ]) if not env['embedded']: +SConscript([ +'state_trackers/osmesa/SConscript', +'targets/osmesa/SConscript', +]) + if env['x11']: SConscript([ 'state_trackers/glx/xlib/SConscript', diff --git a/src/gallium/state_trackers/osmesa/SConscript b/src/gallium/state_trackers/osmesa/SConscript new file mode 100644 index 000..fa7c968 --- /dev/null +++ b/src/gallium/state_trackers/osmesa/SConscript @@ -0,0 +1,25 @@ +import os + +Import('*') + +env = env.Clone() + +env.Append(CPPPATH = [ +'#src/mapi', +'#src/mesa', +'.', +]) + +env.AppendUnique(CPPDEFINES = [ +'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers +'WIN32_LEAN_AND_MEAN', # https://urldefense.proofpoint.com/v2/url?u=http-3A__msdn2.microsoft.com_en-2Dus_library_6dwk3a1z.aspx&d=AwIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=V7sOetAjivzNtMiJzzOh63AXslqGWPwHWPoxHrHKbGs&s=2ddtnvnyotNzbqM7WTXS_y4myuI1d-lxwzZA9RPX34o&e= +]) +if not env['gles']: +# prevent _glapi_* from being declared __declspec(dllimport) +env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) + Shouldn't these be used when building for windows only ? +st_osmesa = env.ConvenienceLibrary( +target ='st_osmesa', +source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'), +) +Export('st_osmesa') diff --git a/src/gallium/state_trackers/osmesa/osmesa.def b/src/gallium/state_trackers/osmesa/osmesa.def new file mode 100644 index 000..e2a31ab --- /dev/null +++ b/src/gallium/state_trackers/osmesa/osmesa.def Can we move this file next to it's only user - i.e. into targets/osmesa/ ? @@ -0,0 +1,16 @@ +;DESCRIPTION 'Mesa OSMesa lib for Win32' +VERSION 4.1 + +EXPORTS + OSMesaCreateContext + OSMesaCreateContextExt + OSMesaDestroyContext + OSMesaMakeCurrent + OSMesaGetCurrentContext + OSMesaPixelStore + OSMesaGetIntegerv + OSMesaGetDepthBuffer + OSMesaGetColorBuffer + OSMesaGetProcAddress + OSMesaColorClamp + OSMesaPostprocess diff --git a/src/gallium/targets/osmesa/SConscript b/src/gallium/targets/osmesa/SConscript new file mode 100644 index 000..2c936cf --- /dev/null +++ b/src/gallium/targets/osmesa/SConscript @@ -0,0 +1,41 @@ +Import('*') + +env = env.Clone() + +env.Prepend(CPPPATH = [ +'#src/mapi', +'#src/mesa', +#Dir('../../../mapi'), # src/mapi build path for python-generated GL API files/headers +]) + +sources = [ +'target.c', +] +sources += ['#src/gallium/state_trackers/osmesa/osmesa.def'] + Afaict this should be included only if the target is Windows. +drivers = [] + +if env['llvm']: +env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE') +env.Append(CPPDEFINES = 'GALLIUM_TRACE') +drivers += [llvmpipe] +else: +env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') +env.Append(CPPDEFINES = 'GALLIUM_TRACE') +drivers += [softpipe] + One should include softpipe unconditionally as we can switch between llvmpipe and softpipe at runtime. +if env['platform'] == 'windows': +env.AppendUnique(CPPDEFINES = [ +'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers +]) +if not env['gles']: +# prevent _glapi_* from being declared __declspec(dllimport) +env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) + Don't think you need this if block. +gallium_osmesa = env.SharedLibrary( +target ='osmesa', +source = sources, +LIBS = drivers + st_osmesa + ws_null + glapi + mesa + gallium + trace + glsl + mesautil + env['LIBS'], How about we move this before the SharedLibrary construct and use env.Prepend(LIBS =... like other places in mesa ? Thanks Emil _
[Mesa-dev] [Bug 89823] [swrast] driver loads but complains then fails to work in Piglit which shows GLSL message
https://bugs.freedesktop.org/show_bug.cgi?id=89823 Dan Sebald changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |INVALID -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().
On 04/08/2015 01:46 PM, Jason Ekstrand wrote: > On Wed, Apr 8, 2015 at 12:53 PM, Ian Romanick wrote: >> On 04/08/2015 02:25 AM, Martin Peres wrote: >>> On 08/04/15 10:06, Kenneth Graunke wrote: Previously, we translated into NIR and did all the optimizations and lowering as part of running fs_visitor. This meant that we did all of that work twice for fragment shaders - once for SIMD8, and again for SIMD16. We also had to redo it every time we hit a state based recompile. We now generate NIR once at link time. ARB programs don't have linking, so we instead generate it at ProgramStringNotify time. Mesa's fixed function vertex program handling doesn't bother to inform the driver about new programs at all (which is rather mean), so we generate NIR at the last minute, if it hasn't happened already. shader-db runs ~9.4% faster on my i7-5600U, with a release build. >>> >>> Nice speed improvement but wouldn't it affect negatively programs using >>> SSO to recombine shaders at run time? >> >> Hm... that's a fair question. Does NIR do any cross-stage optimization? > > Not at the moment. We probably should since NIR can probably > dead-code things better. Okay. Then this shouldn't impact SSO. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89963] lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm?=::raw ostream::raw ostream()=?UTF-8?Q?’
https://bugs.freedesktop.org/show_bug.cgi?id=89963 Bug ID: 89963 Summary: lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm::raw_ostream::raw_ostream()’ Product: Mesa Version: git Hardware: x86-64 (AMD64) OS: All Status: NEW Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: v...@freedesktop.org QA Contact: mesa-dev@lists.freedesktop.org mesa: a873b79fa5e3138196a3c1785f2a65308fa78286 (master 10.6.0-devel) Build error with llvm-3.7.0svn. Compiling src/gallium/auxiliary/gallivm/lp_bld_debug.cpp ... src/gallium/auxiliary/gallivm/lp_bld_debug.cpp: In constructor ‘raw_debug_ostream::raw_debug_ostream()’: src/gallium/auxiliary/gallivm/lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm::raw_ostream::raw_ostream()’ raw_debug_ostream() : pos(0) { } ^ src/gallium/auxiliary/gallivm/lp_bld_debug.cpp:100:31: note: candidate is: In file included from src/gallium/auxiliary/gallivm/lp_bld_debug.cpp:33:0: include/llvm/Support/raw_ostream.h:95:12: note: llvm::raw_ostream::raw_ostream(llvm::raw_ostream::StreamKind, bool) explicit raw_ostream(StreamKind Kind, bool unbuffered = false) ^ include/llvm/Support/raw_ostream.h:95:12: note: candidate expects 2 arguments, 0 provided -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] gallium/ttn: minor cleanup
From: Rob Clark Extract tgsi_dst->Index into a local.. split out from 'gallium/ttn: add support for temp arrays' for noise reduction.. Signed-off-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index da935a4..fcccdad 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -340,17 +340,18 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) { struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register; nir_alu_dest dest; + unsigned index = tgsi_dst->Index; memset(&dest, 0, sizeof(dest)); if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { - dest.dest.reg.reg = c->temp_regs[tgsi_dst->Index].reg; - dest.dest.reg.base_offset = c->temp_regs[tgsi_dst->Index].offset; + dest.dest.reg.reg = c->temp_regs[index].reg; + dest.dest.reg.base_offset = c->temp_regs[index].offset; } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { - dest.dest.reg.reg = c->output_regs[tgsi_dst->Index].reg; - dest.dest.reg.base_offset = c->output_regs[tgsi_dst->Index].offset; + dest.dest.reg.reg = c->output_regs[index].reg; + dest.dest.reg.base_offset = c->output_regs[index].offset; } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) { - assert(tgsi_dst->Index == 0); + assert(index == 0); dest.dest.reg.reg = c->addr_reg; } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] i965/vs: Add src_reg::negative_equals method
From: Ian Romanick This method is similar to the existing ::equals method. Instead of testing that two src_regs are equal to each other, it tests that one is the negation of the other. Signed-off-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_ir_vec4.h | 1 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 43 + 2 files changed, 44 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index d3bd64d..449795a 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -49,6 +49,7 @@ public: src_reg(struct brw_reg reg); bool equals(const src_reg &r) const; + bool negative_equals(const src_reg &r) const; src_reg(class vec4_visitor *v, const struct glsl_type *type); src_reg(class vec4_visitor *v, const struct glsl_type *type, int size); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index ef2fd40..d5286c2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -328,6 +328,49 @@ src_reg::equals(const src_reg &r) const } bool +src_reg::negative_equals(const src_reg &r) const +{ + if (file != r.file) + return false; + + if (file == IMM) { + if (!(reg == r.reg && +reg_offset == r.reg_offset && +type == r.type && +negate == r.negate && +abs == r.abs && +swizzle == r.swizzle && +!reladdr && !r.reladdr)) + return false; + + switch (fixed_hw_reg.type) { + case BRW_REGISTER_TYPE_F: + return memcmp(&fixed_hw_reg, &r.fixed_hw_reg, + sizeof(fixed_hw_reg) - sizeof(fixed_hw_reg.dw1)) == 0 && +fixed_hw_reg.dw1.f == -r.fixed_hw_reg.dw1.f; + + case BRW_REGISTER_TYPE_D: + return memcmp(&fixed_hw_reg, &r.fixed_hw_reg, + sizeof(fixed_hw_reg) - sizeof(fixed_hw_reg.dw1)) == 0 && +fixed_hw_reg.dw1.d == -r.fixed_hw_reg.dw1.d; + + default: + return false; + } + } else { + return reg == r.reg && + reg_offset == r.reg_offset && + type == r.type && + negate != r.negate && + abs == r.abs && + swizzle == r.swizzle && + !reladdr && !r.reladdr && + memcmp(&fixed_hw_reg, &r.fixed_hw_reg, +sizeof(fixed_hw_reg)) == 0; + } +} + +bool vec4_visitor::opt_vector_float() { bool progress = false; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] i965: Emit MUL with a negated src for neg(mul(...)).
From: Matt Turner Shader-db results: GM45: total instructions in shared programs: 4060151 -> 4059575 (-0.01%) instructions in affected programs: 81478 -> 80902 (-0.71%) helped:441 HURT: 4 GM45 NIR: total instructions in shared programs: 4079065 -> 4078671 (-0.01%) instructions in affected programs: 67196 -> 66802 (-0.59%) helped:398 HURT: 4 Iron Lake: total instructions in shared programs: 5477330 -> 5476586 (-0.01%) instructions in affected programs: 92444 -> 91700 (-0.80%) helped:472 HURT: 4 Iron Lake NIR: total instructions in shared programs: 5675880 -> 5675486 (-0.01%) instructions in affected programs: 67196 -> 66802 (-0.59%) helped:398 HURT: 4 Sandy Bridge: total instructions in shared programs: 7307710 -> 7305083 (-0.04%) instructions in affected programs: 282903 -> 280276 (-0.93%) helped:1506 Sandy Bridge NIR: total instructions in shared programs: 7329198 -> 7328404 (-0.01%) instructions in affected programs: 133777 -> 132983 (-0.59%) helped:682 Ivy Bridge: total instructions in shared programs: 6763747 -> 6763036 (-0.01%) instructions in affected programs: 79009 -> 78298 (-0.90%) helped:573 Ivy Bridge NIR: total instructions in shared programs: 6765185 -> 6764738 (-0.01%) instructions in affected programs: 61743 -> 61296 (-0.72%) helped:445 Haswell: total instructions in shared programs: 6223429 -> 6222718 (-0.01%) instructions in affected programs: 77427 -> 76716 (-0.92%) helped:573 Haswell NIR: total instructions in shared programs: 6180970 -> 6180523 (-0.01%) instructions in affected programs: 61743 -> 61296 (-0.72%) helped:445 Broadwell: total instructions in shared programs: 7284540 -> 7284103 (-0.01%) instructions in affected programs: 31526 -> 31089 (-1.39%) helped:251 Broadwell NIR: total instructions in shared programs: 7500487 -> 7500487 (0.00%) instructions in affected programs: 0 -> 0 Reviewed-by: Ben Widawsky Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 15 +++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 20 2 files changed, 35 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 3622e65..574c266 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -700,6 +700,21 @@ fs_visitor::visit(ir_expression *ir) /* Deal with the real oddball stuff first */ switch (ir->operation) { + case ir_unop_neg: { + if (!ir->type->is_float()) + break; + + ir_expression *mul = ir->operands[0]->as_expression(); + if (mul && mul->operation == ir_binop_mul) { + mul->accept(this); + fs_inst *mul_inst = (fs_inst *) this->instructions.get_tail(); + assert(mul_inst->opcode == BRW_OPCODE_MUL); + + mul_inst->src[1].negate = true; + return; + } + break; + } case ir_binop_add: if (brw->gen <= 5 && try_emit_line(ir)) return; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index ffbe04d..b9d6087 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1303,6 +1303,26 @@ vec4_visitor::visit(ir_expression *ir) src_reg op[ARRAY_SIZE(ir->operands)]; vec4_instruction *inst; + switch (ir->operation) { + case ir_unop_neg: { + if (!ir->type->is_float()) + break; + + ir_expression *mul = ir->operands[0]->as_expression(); + if (mul && mul->operation == ir_binop_mul) { + mul->accept(this); + vec4_instruction *mul_inst = (vec4_instruction *) this->instructions.get_tail(); + assert(mul_inst->opcode == BRW_OPCODE_MUL); + + mul_inst->src[1].negate = true; + return; + } + break; + } + default: + break; + } + if (ir->operation == ir_binop_add) { if (try_emit_mad(ir)) return; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] glsl/cse: Use ir_rvalue_enter_visitor instead of ir_rvalue_visitor
From: Ian Romanick ir_rvalue_visitor visits each rvalue on exit. When visiting a large expression, the leaf expressions will be visited and eliminated first. Once one leaf expression was replaced, it would no longer match a potentially much larger tree. This means that code like: x = a + (b * c); y = -(a + (b * c)); would effectively be replaced by tmp = b * c; x = a + tmp; y = -(a + tmp); As a result both opportunities for generating a MAD would be lost, and we would generate worse code. Using ir_rvalue_enter_visitor means that larger expression trees will be checked first, and we have the potential to eliminate much larger expressions. I believe that opt_cse.cpp predates the existence of ir_rvalue_enter_visitor. Shader-db results: GM45: total instructions in shared programs: 4063165 -> 4061744 (-0.03%) instructions in affected programs: 21664 -> 20243 (-6.56%) helped:259 GM45 NIR: total instructions in shared programs: 4082044 -> 4080646 (-0.03%) instructions in affected programs: 21091 -> 19693 (-6.63%) helped:255 HURT: 1 Iron Lake: total instructions in shared programs: 5480334 -> 5478897 (-0.03%) instructions in affected programs: 25798 -> 24361 (-5.57%) helped:273 HURT: 1 Iron Lake NIR: total instructions in shared programs: 5678776 -> 5677395 (-0.02%) instructions in affected programs: 21744 -> 20363 (-6.35%) helped:263 HURT: 2 Sandy Bridge: total instructions in shared programs: 7318903 -> 7316983 (-0.03%) instructions in affected programs: 37937 -> 36017 (-5.06%) helped:398 HURT: 26 Sandy Bridge NIR: total instructions in shared programs: 7329995 -> 7328069 (-0.03%) instructions in affected programs: 32487 -> 30561 (-5.93%) helped:384 HURT: 6 Ivy Bridge: total instructions in shared programs: 6766579 -> 6765409 (-0.02%) instructions in affected programs: 18110 -> 16940 (-6.46%) helped:288 HURT: 16 GAINED:1 Ivy Bridge NIR: total instructions in shared programs: 6769314 -> 6768159 (-0.02%) instructions in affected programs: 11063 -> 9908 (-10.44%) helped:264 HURT: 6 Haswell: total instructions in shared programs: 6226294 -> 6225102 (-0.02%) instructions in affected programs: 17555 -> 16363 (-6.79%) helped:297 HURT: 10 GAINED:1 Haswell NIR: total instructions in shared programs: 6183693 -> 6182538 (-0.02%) instructions in affected programs: 10990 -> 9835 (-10.51%) helped:264 HURT: 6 Broadwell: total instructions in shared programs: 7285895 -> 7284537 (-0.02%) instructions in affected programs: 31977 -> 30619 (-4.25%) helped:357 HURT: 6 Broadwell NIR: total instructions in shared programs: 7501711 -> 7501544 (-0.00%) instructions in affected programs: 7174 -> 7007 (-2.33%) helped:87 HURT: 2 Signed-off-by: Ian Romanick --- src/glsl/opt_cse.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp index 4b8e9a0..425eebc 100644 --- a/src/glsl/opt_cse.cpp +++ b/src/glsl/opt_cse.cpp @@ -99,7 +99,7 @@ public: ir_variable *var; }; -class cse_visitor : public ir_rvalue_visitor { +class cse_visitor : public ir_rvalue_enter_visitor { public: cse_visitor(exec_list *validate_instructions) : validate_instructions(validate_instructions) -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] i965/vs: Allow CSE to handle MULs with negated arguments.
From: Ian Romanick This is similar to commit (47c4b38: i965/fs: Allow CSE to handle MULs with negated arguments.), but it uses a slightly different approach. Shader-db results: GM45: total instructions in shared programs: 4060813 -> 4060151 (-0.02%) instructions in affected programs: 13448 -> 12786 (-4.92%) helped:62 HURT: 9 All other results, except Broadwell, were identical to GM45 w/o NIR. Since NIR isn't used for VEC4, this makes sense. Broadwell: total instructions in shared programs: 7284561 -> 7284540 (-0.00%) instructions in affected programs: 1272 -> 1251 (-1.65%) helped:12 Broadwell NIR: total instructions in shared programs: 7500487 -> 7500487 (0.00%) instructions in affected programs: 0 -> 0 Signed-off-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 32 +- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 100e511..49b50a7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -90,15 +90,34 @@ is_expression(const vec4_instruction *const inst) } static bool -operands_match(const vec4_instruction *a, const vec4_instruction *b) +operands_match(const vec4_instruction *a, const vec4_instruction *b, + bool *negate) { const src_reg *xs = a->src; const src_reg *ys = b->src; + *negate = false; + if (a->opcode == BRW_OPCODE_MAD) { return xs[0].equals(ys[0]) && ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); + } else if (a->opcode == BRW_OPCODE_MUL) { + if ((xs[0].equals(ys[0]) && xs[1].equals(ys[1])) || + (xs[1].equals(ys[0]) && xs[0].equals(ys[1])) || + (xs[0].negative_equals(ys[0]) && xs[1].negative_equals(ys[1])) || + (xs[1].negative_equals(ys[0]) && xs[0].negative_equals(ys[1]))) + return true; + + if ((xs[0].equals(ys[0]) && xs[1].negative_equals(ys[1])) || + (xs[1].equals(ys[0]) && xs[0].negative_equals(ys[1])) || + (xs[0].negative_equals(ys[0]) && xs[1].equals(ys[1])) || + (xs[1].negative_equals(ys[0]) && xs[0].equals(ys[1]))) { + *negate = true; + return true; + } + + return false; } else if (!a->is_commutative()) { return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]); } else { @@ -108,7 +127,7 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b) } static bool -instructions_match(vec4_instruction *a, vec4_instruction *b) +instructions_match(vec4_instruction *a, vec4_instruction *b, bool *negate) { return a->opcode == b->opcode && a->saturate == b->saturate && @@ -117,7 +136,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b) a->dst.writemask == b->dst.writemask && a->force_writemask_all == b->force_writemask_all && a->regs_written == b->regs_written && - operands_match(a, b); + operands_match(a, b, negate); } bool @@ -135,11 +154,12 @@ vec4_visitor::opt_cse_local(bblock_t *block) (inst->dst.file != HW_REG || inst->dst.is_null())) { bool found = false; + bool negate; foreach_in_list_use_after(aeb_entry, entry, &aeb) { /* Match current instruction's expression against those in AEB. */ if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) && -instructions_match(inst, entry->generator)) { +instructions_match(inst, entry->generator, &negate)) { found = true; progress = true; break; @@ -186,6 +206,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) vec4_instruction *copy = MOV(offset(inst->dst, i), offset(entry->tmp, i)); copy->force_writemask_all = inst->force_writemask_all; + copy->src[0].negate = negate; inst->insert_before(block, copy); } } @@ -206,9 +227,10 @@ vec4_visitor::opt_cse_local(bblock_t *block) * the flag register if we just wrote it. */ if (inst->writes_flag()) { +bool negate; /* dummy */ if (entry->generator->reads_flag() || (entry->generator->writes_flag() && - !instructions_match(inst, entry->generator))) { + !instructions_match(inst, entry->generator, &negate))) { entry->remove(); ralloc_free(entry); continue; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/ma
Re: [Mesa-dev] [PATCH 1/2] gallivm: don't use control flow when doing indirect constant buffer lookups
Am 08.04.2015 um 21:13 schrieb Jose Fonseca: > Series looks good to me. > > Just a few suggestions inline. > > > On 04/04/15 15:50, srol...@vmware.com wrote: >> From: Roland Scheidegger >> >> llvm goes crazy when doing that, using way more memory and time, >> though there's >> probably more to it - this points to a very much similar issue as >> fixed in >> 8a9f5ecdb116d0449d63f7b94efbfa8b205d826f. In any case I've seen a quite >> plain looking vertex shader with just ~50 simple tgsi instructions >> (but with a >> dozen or so such indirect constant buffer lookups) go from a terribly >> high >> ~440ms compile time (consuming 25MB of memory in the process) down to >> a still >> awful ~230ms and 13MB with this fix (with llvm 3.3), so there's still >> obvious >> improvements possible (but I have no clue why it's so slow...). >> The resulting shader is most likely also faster (certainly seemed so >> though >> I don't have any hard numbers as it may have been influenced by >> compile times) >> since generally fetching constants outside the buffer range is most >> likely an >> app error (that is we expect all indices to be valid). >> It is possible this fixes some mysterious vertex shader slowdowns >> we've seen >> ever since we are conforming to newer apis at least partially (the >> main draw >> loop also has similar looking conditionals which we probably could do >> without - >> if not for the fetch at least for the additional elts condition.) >> --- >> src/gallium/auxiliary/draw/draw_llvm.h | 2 + >> .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 27 +++--- >> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 95 >> +- >> src/gallium/drivers/llvmpipe/lp_scene.h| 2 + >> src/gallium/drivers/llvmpipe/lp_setup.c| 6 +- >> 5 files changed, 63 insertions(+), 69 deletions(-) >> >> diff --git a/src/gallium/auxiliary/draw/draw_llvm.h >> b/src/gallium/auxiliary/draw/draw_llvm.h >> index 9565fc6..a1983e1 100644 >> --- a/src/gallium/auxiliary/draw/draw_llvm.h >> +++ b/src/gallium/auxiliary/draw/draw_llvm.h >> @@ -472,6 +472,8 @@ struct draw_llvm { >> >> struct draw_gs_llvm_variant_list_item gs_variants_list; >> int nr_gs_variants; >> + >> + float fake_const_buf[4]; > > Couldn't we make fake_const_buf a mere local static const array instead? > It would save memory. Ah right can just declare that in llvm_middle_end_prepare(). Putting it in the scene data seemed convenient for the fs case (there's only one scene per context anyway but I guess could do the same thing there too). > >> }; >> >> >> diff --git >> a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c >> b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c >> index 0dfafdc..03257d8 100644 >> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c >> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c >> @@ -273,28 +273,35 @@ llvm_middle_end_bind_parameters(struct >> draw_pt_middle_end *middle) >> { >> struct llvm_middle_end *fpme = llvm_middle_end(middle); >> struct draw_context *draw = fpme->draw; >> + struct draw_llvm *llvm = fpme->llvm; >> unsigned i; >> >> - for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); >> ++i) { >> + for (i = 0; i < Elements(llvm->jit_context.vs_constants); ++i) { >> int num_consts = >>draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4); >> - fpme->llvm->jit_context.vs_constants[i] = >> draw->pt.user.vs_constants[i]; >> - fpme->llvm->jit_context.num_vs_constants[i] = num_consts; >> + llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; >> + llvm->jit_context.num_vs_constants[i] = num_consts; >> + if (num_consts == 0) { >> + llvm->jit_context.vs_constants[i] = llvm->fake_const_buf; >> + } >> } >> - for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); >> ++i) { >> + for (i = 0; i < Elements(llvm->gs_jit_context.constants); ++i) { >> int num_consts = >>draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4); >> - fpme->llvm->gs_jit_context.constants[i] = >> draw->pt.user.gs_constants[i]; >> - fpme->llvm->gs_jit_context.num_constants[i] = num_consts; >> + llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; >> + llvm->gs_jit_context.num_constants[i] = num_consts; >> + if (num_consts == 0) { >> + llvm->gs_jit_context.constants[i] = llvm->fake_const_buf; >> + } >> } >> >> - fpme->llvm->jit_context.planes = >> + llvm->jit_context.planes = >> (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; >> - fpme->llvm->gs_jit_context.planes = >> + llvm->gs_jit_context.planes = >> (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; >> >> - fpme->llvm->jit_context.viewports = draw->viewports; >> - fpme->llvm->gs_jit_context.viewports = draw->viewports;
[Mesa-dev] [Bug 89960] [softpipe] piglit copy-pixels regreession
https://bugs.freedesktop.org/show_bug.cgi?id=89960 Bug ID: 89960 Summary: [softpipe] piglit copy-pixels regreession Product: Mesa Version: git Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Keywords: bisected, regression Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: v...@freedesktop.org QA Contact: mesa-dev@lists.freedesktop.org CC: airl...@freedesktop.org mesa: a873b79fa5e3138196a3c1785f2a65308fa78286 (master 10.6.0-devel) $ ./bin/copy-pixels -auto Probe stencil at (12, 12) Expected: 2 Observed: 0 PIGLIT: {"result": "fail" } 61393bdcdc3b63624bf6e9730444f5e9deeedfc8 is the first bad commit commit 61393bdcdc3b63624bf6e9730444f5e9deeedfc8 Author: Dave Airlie Date: Tue Apr 7 09:52:41 2015 +1000 u_tile: fix stencil texturing tests under softpipe arb_stencil_texturing-draw failed under softpipe because we got a float back from the texturing function, and then tried to U2F it, stencil texturing returns ints, so we should fix the tiling to retrieve the stencil values as integers not floats. Signed-off-by: Dave Airlie :04 04 aad741af761764f93de05cb9a202b41a56c96645 55f28cb0eaf6c88a7f0e52e7baa67d221779a2ce Msrc bisect run success -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] gallium/ttn: add support for temp arrays
On Wed, Apr 8, 2015 at 6:34 PM, Rob Clark wrote: > From: Rob Clark > > Since the rest of NIR really would rather have these as variables rather > than registers, create a nir_variable per array. But rather than > completely re-arrange ttn to be variable based rather than register > based, keep the registers. In the cases where there is a matching var > for the reg, ttn_emit_instruction will append the appropriate intrinsic > to get things back from the shadow reg into the variable. hmm, bleh, I probably should have updated the commit msg before hitting send.. this version drops the pre-declared registers for array elements, and instead creates temp regs on demand as Eric suggested (and mentioned in the v3 note below) BR, -R > NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give > an array id. But those just kinda suck, and should really go away. > AFAICT we don't get those from glsl. Might be an issue for some other > state tracker. > > v2: rework to use load_var/store_var with deref chains > v3: create new "burner" reg for temporarily holding the (potentially > writemask'd) dest after each instruction; add load_var to initialize > temporary dest in case not all components are overwritten > > Signed-off-by: Rob Clark > --- > src/gallium/auxiliary/nir/tgsi_to_nir.c | 159 > +--- > 1 file changed, 144 insertions(+), 15 deletions(-) > > diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c > b/src/gallium/auxiliary/nir/tgsi_to_nir.c > index fcccdad..c3332cc 100644 > --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c > +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c > @@ -44,6 +44,7 @@ > struct ttn_reg_info { > /** nir register containing this TGSI index. */ > nir_register *reg; > + nir_variable *var; > /** Offset (in vec4s) from the start of var for this TGSI index. */ > int offset; > }; > @@ -120,21 +121,32 @@ ttn_emit_declaration(struct ttn_compile *c) > unsigned i; > > if (file == TGSI_FILE_TEMPORARY) { > - nir_register *reg; > - if (c->scan->indirect_files & (1 << file)) { > - reg = nir_local_reg_create(b->impl); > - reg->num_components = 4; > - reg->num_array_elems = array_size; > + if (decl->Declaration.Array) { > + /* for arrays, we create variables instead of registers: */ > + nir_variable *var = rzalloc(b->shader, nir_variable); > + > + var->type = glsl_array_type(glsl_vec4_type(), array_size); > + var->data.mode = nir_var_global; > + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); > + > + exec_list_push_tail(&b->shader->globals, &var->node); > > for (i = 0; i < array_size; i++) { > -c->temp_regs[decl->Range.First + i].reg = reg; > +/* point all the matching slots to the same var, > + * with appropriate offset set, mostly just so > + * we know what to do when tgsi does a non-indirect > + * access > + */ > +c->temp_regs[decl->Range.First + i].reg = NULL; > +c->temp_regs[decl->Range.First + i].var = var; > c->temp_regs[decl->Range.First + i].offset = i; > } >} else { > for (i = 0; i < array_size; i++) { > -reg = nir_local_reg_create(b->impl); > +nir_register *reg = nir_local_reg_create(b->impl); > reg->num_components = 4; > c->temp_regs[decl->Range.First + i].reg = reg; > +c->temp_regs[decl->Range.First + i].var = NULL; > c->temp_regs[decl->Range.First + i].offset = 0; > } >} > @@ -245,6 +257,32 @@ ttn_emit_immediate(struct ttn_compile *c) > static nir_src * > ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register > *indirect); > > +/* generate either a constant or indirect deref chain for accessing an > + * array variable. > + */ > +static nir_deref_var * > +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset, > +struct tgsi_ind_register *indirect) > +{ > + nir_builder *b = &c->build; > + nir_deref_var *deref = nir_deref_var_create(b->shader, var); > + nir_deref_array *arr = nir_deref_array_create(b->shader); > + > + arr->base_offset = offset; > + arr->deref.type = glsl_get_array_element(var->type); > + > + if (indirect) { > + arr->deref_array_type = nir_deref_array_type_indirect; > + arr->indirect = nir_src_for_reg(c->addr_reg); > + } else { > + arr->deref_array_type = nir_deref_array_type_direct; > + } > + > + deref->deref.child = &arr->deref; > + > + return deref; > +} > + > static nir_src > ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned > index, > struct tgsi_ind_register *indirect) > @@ -256,10 +294,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, > unsigned file, unsigned index, > > switch (file) { > case TGSI_FILE_TE
Re: [Mesa-dev] [PATCH] i965: Add XRGB8888 format to intel_screen_make_configs
Please consider i915 as well. Should it be add to .../dri/i915/intel_screen.c? diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c index 34efb29..5cd2a9b 100644 --- a/src/mesa/drivers/dri/i915/intel_screen.c +++ b/src/mesa/drivers/dri/i915/intel_screen.c @@ -1061,7 +1076,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen) { static const mesa_format formats[] = { MESA_FORMAT_B5G6R5_UNORM, - MESA_FORMAT_B8G8R8A8_UNORM + MESA_FORMAT_B8G8R8A8_UNORM, + MESA_FORMAT_B8G8R8X8_UNORM, }; /* GLX_SWAP_COPY_OML is not supported due to page flipping. */ 2015-03-25 19:36 GMT+08:00 Boyan Ding : > Some application, such as drm backend of weston, uses XRGB config as > default. i965 doesn't provide this format, but before commit 65c8965d, > the drm platform of EGL takes ARGB as XRGB. Now that commit > 65c8965d makes EGL recognize format correctly so weston won't start > because it can't find XRGB. Add XRGB format to i965 just as > other drivers do. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689 > Signed-off-by: Boyan Ding > --- > src/mesa/drivers/dri/i965/intel_screen.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_screen.c > b/src/mesa/drivers/dri/i965/intel_screen.c > index 3640b67..2b82c33 100644 > --- a/src/mesa/drivers/dri/i965/intel_screen.c > +++ b/src/mesa/drivers/dri/i965/intel_screen.c > @@ -1126,7 +1126,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen) > { > static const mesa_format formats[] = { >MESA_FORMAT_B5G6R5_UNORM, > - MESA_FORMAT_B8G8R8A8_UNORM > + MESA_FORMAT_B8G8R8A8_UNORM, > + MESA_FORMAT_B8G8R8X8_UNORM > }; > > /* GLX_SWAP_COPY_OML is not supported due to page flipping. */ > -- > 2.3.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev -- Chih-Wei Android-x86 project http://www.android-x86.org ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89963] lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm?=::raw ostream::raw ostream()=?UTF-8?Q?’
https://bugs.freedesktop.org/show_bug.cgi?id=89963 Vinson Lee changed: What|Removed |Added Keywords||bisected --- Comment #1 from Vinson Lee --- Build error introduced with llvm-3.7.0svn r234460. commit 271631a0afecebfb806e8d4d67407c919c4e1c0d Author: Rafael Espindola Date: Thu Apr 9 02:10:28 2015 + Add classof implementations to the raw_ostream classes. More uses to follow in a another patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234460 91177308-0d34-0410-b5e6-96231b3b80d8 -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().
On Wed, Apr 8, 2015 at 12:53 PM, Ian Romanick wrote: > On 04/08/2015 02:25 AM, Martin Peres wrote: >> On 08/04/15 10:06, Kenneth Graunke wrote: >>> Previously, we translated into NIR and did all the optimizations and >>> lowering as part of running fs_visitor. This meant that we did all of >>> that work twice for fragment shaders - once for SIMD8, and again for >>> SIMD16. We also had to redo it every time we hit a state based >>> recompile. >>> >>> We now generate NIR once at link time. ARB programs don't have linking, >>> so we instead generate it at ProgramStringNotify time. >>> >>> Mesa's fixed function vertex program handling doesn't bother to inform >>> the driver about new programs at all (which is rather mean), so we >>> generate NIR at the last minute, if it hasn't happened already. >>> >>> shader-db runs ~9.4% faster on my i7-5600U, with a release build. >> >> Nice speed improvement but wouldn't it affect negatively programs using >> SSO to recombine shaders at run time? > > Hm... that's a fair question. Does NIR do any cross-stage optimization? Not at the moment. We probably should since NIR can probably dead-code things better. --Jason >>> Signed-off-by: Kenneth Graunke >>> --- >>> src/mesa/drivers/dri/i965/Makefile.sources | 1 + >>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 174 >>> +-- >>> src/mesa/drivers/dri/i965/brw_nir.c| 213 >>> + >>> src/mesa/drivers/dri/i965/brw_nir.h| 6 + >>> src/mesa/drivers/dri/i965/brw_program.c| 7 + >>> src/mesa/drivers/dri/i965/brw_shader.cpp | 6 + >>> src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 ++- >>> src/mesa/main/mtypes.h | 2 + >>> src/mesa/program/program.c | 5 + >>> 9 files changed, 255 insertions(+), 176 deletions(-) >>> create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c >>> >>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >>> b/src/mesa/drivers/dri/i965/Makefile.sources >>> index 498d5a7..6d4659f 100644 >>> --- a/src/mesa/drivers/dri/i965/Makefile.sources >>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources >>> @@ -77,6 +77,7 @@ i965_FILES = \ >>> brw_misc_state.c \ >>> brw_multisample_state.h \ >>> brw_nir.h \ >>> +brw_nir.c \ >>> brw_nir_analyze_boolean_resolves.c \ >>> brw_object_purgeable.c \ >>> brw_packed_float.c \ >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> index 034b79a..ccffd5d 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >>> @@ -28,175 +28,10 @@ >>> #include "brw_fs.h" >>> #include "brw_nir.h" >>> -static void >>> -nir_optimize(nir_shader *nir) >>> -{ >>> - bool progress; >>> - do { >>> - progress = false; >>> - nir_lower_vars_to_ssa(nir); >>> - nir_validate_shader(nir); >>> - nir_lower_alu_to_scalar(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_copy_prop(nir); >>> - nir_validate_shader(nir); >>> - nir_lower_phis_to_scalar(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_copy_prop(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_opt_dce(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_opt_cse(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_opt_peephole_select(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_opt_algebraic(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_opt_constant_folding(nir); >>> - nir_validate_shader(nir); >>> - progress |= nir_opt_remove_phis(nir); >>> - nir_validate_shader(nir); >>> - } while (progress); >>> -} >>> - >>> -static bool >>> -count_nir_instrs_in_block(nir_block *block, void *state) >>> -{ >>> - int *count = (int *) state; >>> - nir_foreach_instr(block, instr) { >>> - *count = *count + 1; >>> - } >>> - return true; >>> -} >>> - >>> -static int >>> -count_nir_instrs(nir_shader *nir) >>> -{ >>> - int count = 0; >>> - nir_foreach_overload(nir, overload) { >>> - if (!overload->impl) >>> - continue; >>> - nir_foreach_block(overload->impl, count_nir_instrs_in_block, >>> &count); >>> - } >>> - return count; >>> -} >>> - >>> void >>> fs_visitor::emit_nir_code() >>> { >>> - const nir_shader_compiler_options *options = >>> - ctx->Const.ShaderCompilerOptions[stage].NirOptions; >>> - >>> - nir_shader *nir; >>> - /* First, lower the GLSL IR or Mesa IR to NIR */ >>> - if (shader_prog) { >>> - nir = glsl_to_nir(&shader->base, options); >>> - } else { >>> - nir = prog_to_nir(prog, options); >>> - nir_convert_to_ssa(nir); /* turn registers into SSA */ >>> - } >>> - nir_validate_shader(nir); >>> - >>> - nir_lower_global_vars_to_local(nir); >>> - nir_validate_shader(nir); >>> - >>> - nir_lower_tex_projector(n
[Mesa-dev] [PATCH] gallivm: Fix build since llvm-3.7.0svn r234460.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89963 Signed-off-by: Vinson Lee --- src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp index 65d2896..b712915 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp @@ -97,7 +97,11 @@ private: uint64_t pos; public: +#if HAVE_LLVM >= 0x0307 + raw_debug_ostream() : raw_ostream(SK_FD), pos(0) { } +#else raw_debug_ostream() : pos(0) { } +#endif void write_impl(const char *Ptr, size_t Size); -- 2.3.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] gallium/ttn: use single component address register
From: Rob Clark Only needs to be a vec1, and this helps out the later opt stages. From the shader (after opt) for fs-temp-array-mat3-index-col-row-wr goes, before: vec1 ssa_408 = imul ssa_155, ssa_1 vec4 ssa_413 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w vec4 ssa_166 = intrinsic load_uniform () () (0, 1) vec4 ssa_772 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z intrinsic store_var (ssa_772) (arr_5[ssa_413]) () vec4 ssa_416 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w vec4 ssa_178 = intrinsic load_uniform () () (1, 1) vec4 ssa_787 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z intrinsic store_var (ssa_787) (arr_5[1 + ssa_416]) () vec4 ssa_190 = intrinsic load_uniform () () (2, 1) vec4 ssa_802 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z intrinsic store_var (ssa_802) (arr_5[2 + ssa_416]) () after: vec1 ssa_408 = imul ssa_155, ssa_1 vec4 ssa_166 = intrinsic load_uniform () () (0, 1) vec4 ssa_763 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z intrinsic store_var (ssa_763) (arr_5[ssa_408]) () vec4 ssa_178 = intrinsic load_uniform () () (1, 1) vec4 ssa_778 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z intrinsic store_var (ssa_778) (arr_5[1 + ssa_408]) () vec4 ssa_190 = intrinsic load_uniform () () (2, 1) vec4 ssa_793 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z intrinsic store_var (ssa_793) (arr_5[2 + ssa_408]) () ie. it realizes the indirect is the same for all three store_var's which avoids my backend generating duplicate (mov (shl (cov))) instruction chains. v2: add assert, and get rid of pointless imov in other indirect paths Signed-off-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index c3332cc..648ac6f 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -152,7 +152,7 @@ ttn_emit_declaration(struct ttn_compile *c) } } else if (file == TGSI_FILE_ADDRESS) { c->addr_reg = nir_local_reg_create(b->impl); - c->addr_reg->num_components = 4; + c->addr_reg->num_components = 1; } else if (file == TGSI_FILE_SAMPLER) { /* Nothing to record for samplers. */ } else { @@ -350,12 +350,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, load->const_index[0] = index; load->const_index[1] = 1; if (indirect) { - nir_alu_src indirect_address; - memset(&indirect_address, 0, sizeof(indirect_address)); - indirect_address.src = nir_src_for_reg(c->addr_reg); - for (int i = 0; i < 4; i++) -indirect_address.swizzle[i] = indirect->Swizzle; - load->src[0] = nir_src_for_ssa(nir_imov_alu(b, indirect_address, 1)); + assert(indirect->Swizzle == TGSI_SWIZZLE_X); + load->src[0] = nir_src_for_reg(c->addr_reg); } nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); @@ -500,6 +496,8 @@ ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc) tgsi_src->Index, (tgsi_src->Indirect ? &tgsi_fsrc->Indirect : NULL)); + if (tgsi_src->File == TGSI_FILE_ADDRESS) + assert(tgsi_src->SwizzleX == TGSI_SWIZZLE_X); } src.swizzle[0] = tgsi_src->SwizzleX; -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] gallium/ttn: add support for temp arrays
From: Rob Clark Since the rest of NIR really would rather have these as variables rather than registers, create a nir_variable per array. But rather than completely re-arrange ttn to be variable based rather than register based, keep the registers. In the cases where there is a matching var for the reg, ttn_emit_instruction will append the appropriate intrinsic to get things back from the shadow reg into the variable. NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give an array id. But those just kinda suck, and should really go away. AFAICT we don't get those from glsl. Might be an issue for some other state tracker. v2: rework to use load_var/store_var with deref chains v3: create new "burner" reg for temporarily holding the (potentially writemask'd) dest after each instruction; add load_var to initialize temporary dest in case not all components are overwritten Signed-off-by: Rob Clark --- src/gallium/auxiliary/nir/tgsi_to_nir.c | 159 +--- 1 file changed, 144 insertions(+), 15 deletions(-) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index fcccdad..c3332cc 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -44,6 +44,7 @@ struct ttn_reg_info { /** nir register containing this TGSI index. */ nir_register *reg; + nir_variable *var; /** Offset (in vec4s) from the start of var for this TGSI index. */ int offset; }; @@ -120,21 +121,32 @@ ttn_emit_declaration(struct ttn_compile *c) unsigned i; if (file == TGSI_FILE_TEMPORARY) { - nir_register *reg; - if (c->scan->indirect_files & (1 << file)) { - reg = nir_local_reg_create(b->impl); - reg->num_components = 4; - reg->num_array_elems = array_size; + if (decl->Declaration.Array) { + /* for arrays, we create variables instead of registers: */ + nir_variable *var = rzalloc(b->shader, nir_variable); + + var->type = glsl_array_type(glsl_vec4_type(), array_size); + var->data.mode = nir_var_global; + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); + + exec_list_push_tail(&b->shader->globals, &var->node); for (i = 0; i < array_size; i++) { -c->temp_regs[decl->Range.First + i].reg = reg; +/* point all the matching slots to the same var, + * with appropriate offset set, mostly just so + * we know what to do when tgsi does a non-indirect + * access + */ +c->temp_regs[decl->Range.First + i].reg = NULL; +c->temp_regs[decl->Range.First + i].var = var; c->temp_regs[decl->Range.First + i].offset = i; } } else { for (i = 0; i < array_size; i++) { -reg = nir_local_reg_create(b->impl); +nir_register *reg = nir_local_reg_create(b->impl); reg->num_components = 4; c->temp_regs[decl->Range.First + i].reg = reg; +c->temp_regs[decl->Range.First + i].var = NULL; c->temp_regs[decl->Range.First + i].offset = 0; } } @@ -245,6 +257,32 @@ ttn_emit_immediate(struct ttn_compile *c) static nir_src * ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect); +/* generate either a constant or indirect deref chain for accessing an + * array variable. + */ +static nir_deref_var * +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset, +struct tgsi_ind_register *indirect) +{ + nir_builder *b = &c->build; + nir_deref_var *deref = nir_deref_var_create(b->shader, var); + nir_deref_array *arr = nir_deref_array_create(b->shader); + + arr->base_offset = offset; + arr->deref.type = glsl_get_array_element(var->type); + + if (indirect) { + arr->deref_array_type = nir_deref_array_type_indirect; + arr->indirect = nir_src_for_reg(c->addr_reg); + } else { + arr->deref_array_type = nir_deref_array_type_direct; + } + + deref->deref.child = &arr->deref; + + return deref; +} + static nir_src ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, struct tgsi_ind_register *indirect) @@ -256,10 +294,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index, switch (file) { case TGSI_FILE_TEMPORARY: - src.reg.reg = c->temp_regs[index].reg; - src.reg.base_offset = c->temp_regs[index].offset; - if (indirect) - src.reg.indirect = ttn_src_for_indirect(c, indirect); + if (c->temp_regs[index].var) { + unsigned offset = c->temp_regs[index].offset; + nir_variable *var = c->temp_regs[index].var; + nir_intrinsic_instr *load; + + load = nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_var); + load-
Re: [Mesa-dev] [PATCH] glsl: check for forced_language_version in is_version()
Reviewed-by: Ian Romanick On 04/07/2015 09:33 AM, Brian Paul wrote: > Ping. > > On 04/01/2015 02:38 PM, Brian Paul wrote: >> This is a follow-on fix from the earlier "glsl: allow ForceGLSLVersion >> to override #version directives" change. Since we're not changing >> the language_version field, we have to check forced_language_version >> here. >> --- >> src/glsl/glsl_parser_extras.h | 4 +++- >> 1 file changed, 3 insertions(+), 1 deletion(-) >> >> diff --git a/src/glsl/glsl_parser_extras.h >> b/src/glsl/glsl_parser_extras.h >> index 1f5478b..dae7864 100644 >> --- a/src/glsl/glsl_parser_extras.h >> +++ b/src/glsl/glsl_parser_extras.h >> @@ -105,8 +105,10 @@ struct _mesa_glsl_parse_state { >> { >> unsigned required_version = this->es_shader ? >>required_glsl_es_version : required_glsl_version; >> + unsigned this_version = this->forced_language_version >> + ? this->forced_language_version : this->language_version; >> return required_version != 0 >> - && this->language_version >= required_version; >> + && this_version >= required_version; >> } >> >> bool check_version(unsigned required_glsl_version, >> > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Problem with LLVM on Windows with MSVC
Hi, I'm trying to build mesa on windows (MSVC) with gles support and with llvm. Here are the keys I'm using: scons.py gles=yes llvm=yes platform=windows libgl-gd I'm getting a bunch of errors like this: LLVMCore.lib(ValueSymbolTable.obj) : error LNK2038: mismatch detected for 'RuntimeLibrary': value 'MDd_DynamicDebug' doesn't match value 'MTd_StaticDebug' in mesa.lib(uniform_query.obj) I understand that this is due to mismatch between the runtime libraries, but I don't know how to fix it. Should I change the CRT in llvm or in mesa? How should I do that? Thanks, Shervin ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/5] Improve CSE
This series is a blending of some work that I have done and some work that Matt did. This series and some patches that didn't pan out (at least not after some other recent chages) are available on the cse-neg branch of my fdo tree. The overall results for the series are: GM45: total instructions in shared programs: 4063165 -> 4059575 (-0.09%) instructions in affected programs: 320182 -> 316592 (-1.12%) helped:1436 HURT: 33 GM45 NIR: total instructions in shared programs: 4082044 -> 4078671 (-0.08%) instructions in affected programs: 291225 -> 287852 (-1.16%) helped:1360 HURT: 57 Iron Lake: total instructions in shared programs: 5480334 -> 5476586 (-0.07%) instructions in affected programs: 400843 -> 397095 (-0.94%) helped:1602 HURT: 60 Iron Lake NIR: total instructions in shared programs: 5678776 -> 5675486 (-0.06%) instructions in affected programs: 337985 -> 334695 (-0.97%) helped:1461 HURT: 101 Sandy Bridge: total instructions in shared programs: 7310035 -> 7305478 (-0.06%) instructions in affected programs: 370635 -> 366078 (-1.23%) helped:2791 HURT: 83 LOST: 8 Sandy Bridge NIR: total instructions in shared programs: 7329995 -> 7324864 (-0.07%) instructions in affected programs: 819282 -> 814151 (-0.63%) helped:3704 HURT: 906 GAINED:4 Ivy Bridge: total instructions in shared programs: 6766579 -> 6762664 (-0.06%) instructions in affected programs: 351828 -> 347913 (-1.11%) helped:2738 HURT: 79 GAINED:1 Ivy Bridge NIR: total instructions in shared programs: 6769314 -> 6763686 (-0.08%) instructions in affected programs: 641188 -> 635560 (-0.88%) helped:3619 HURT: 219 GAINED:2 Haswell: total instructions in shared programs: 6226294 -> 6222448 (-0.06%) instructions in affected programs: 338353 -> 334507 (-1.14%) helped:2734 HURT: 84 GAINED:1 Haswell NIR: total instructions in shared programs: 6183693 -> 6179653 (-0.07%) instructions in affected programs: 454070 -> 450030 (-0.89%) helped:3114 HURT: 215 GAINED:2 Broadwell: total instructions in shared programs: 7285895 -> 7284103 (-0.02%) instructions in affected programs: 177765 -> 175973 (-1.01%) helped:960 HURT: 101 Broadwell NIR: total instructions in shared programs: 7501711 -> 7499619 (-0.03%) instructions in affected programs: 705285 -> 703193 (-0.30%) helped:2244 HURT: 398 GAINED:2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] glsl: Propagate negates through multiplication chains.
From: Matt Turner We propagate negations to the right-most leaves of the multiplication expression trees: - mul(neg(x), neg(y)) -> mul(x, y) - mul(neg(x), y) -> neg(mul(x, y)) - mul(x, neg(y)) -> neg(mul(x, y)) Sandy Bridge w/o NIR and Broadwell w/o NIR are the only platforms hurt by this change. Shader-db results: GM45: total instructions in shared programs: 4061744 -> 4060813 (-0.02%) instructions in affected programs: 294024 -> 293093 (-0.32%) helped:991 HURT: 332 GM45 NIR: total instructions in shared programs: 4080646 -> 4079727 (-0.02%) instructions in affected programs: 269940 -> 269021 (-0.34%) helped:935 HURT: 328 Iron Lake: total instructions in shared programs: 5478897 -> 5477992 (-0.02%) instructions in affected programs: 363125 -> 362220 (-0.25%) helped:1127 HURT: 373 Iron Lake NIR: total instructions in shared programs: 5677395 -> 5676542 (-0.02%) instructions in affected programs: 316047 -> 315194 (-0.27%) helped:1028 HURT: 371 Sandy Bridge: total instructions in shared programs: 7308115 -> 7308267 (0.00%) instructions in affected programs: 520138 -> 520290 (0.03%) helped:2071 HURT: 1316 LOST: 8 Sandy Bridge NIR: total instructions in shared programs: 7328069 -> 7325820 (-0.03%) instructions in affected programs: 836683 -> 834434 (-0.27%) helped:3032 HURT: 1349 GAINED:4 Ivy Bridge: total instructions in shared programs: 6765781 -> 6763909 (-0.03%) instructions in affected programs: 337545 -> 335673 (-0.55%) helped:2183 HURT: 390 Ivy Bridge NIR: total instructions in shared programs: 6768159 -> 6764295 (-0.06%) instructions in affected programs: 621048 -> 617184 (-0.62%) helped:3102 HURT: 426 GAINED:2 Haswell: total instructions in shared programs: 6225372 -> 6223591 (-0.03%) instructions in affected programs: 323481 -> 321700 (-0.55%) helped:2170 HURT: 401 Haswell NIR: total instructions in shared programs: 6182538 -> 6180262 (-0.04%) instructions in affected programs: 434003 -> 431727 (-0.52%) helped:2597 HURT: 422 GAINED:2 Broadwell: total instructions in shared programs: 7284537 -> 7284561 (0.00%) instructions in affected programs: 166451 -> 166475 (0.01%) helped:561 HURT: 313 Broadwell NIR: total instructions in shared programs: 7501544 -> 7499619 (-0.03%) instructions in affected programs: 698111 -> 696186 (-0.28%) helped:2157 HURT: 396 GAINED:2 Reviewed-by: Ben Widawsky Reviewed-by: Ian Romanick --- src/glsl/opt_algebraic.cpp | 15 +++ 1 file changed, 15 insertions(+) diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index 3d2f2ca..473eb90 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -549,6 +549,21 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); } + if (op_expr[0] && op_expr[0]->operation == ir_unop_neg) { + if (op_expr[1] && op_expr[1]->operation == ir_unop_neg) { +/* mul(neg(x), neg(y)) -> mul(x, y) */ +return mul(op_expr[0]->operands[0], op_expr[1]->operands[0]); + } + + /* mul(neg(x), y) -> neg(mul(x, y)) */ + return neg(mul(op_expr[0]->operands[0], ir->operands[1])); + } + + /* mul(x, neg(y)) -> neg(mul(x, y)) */ + if (op_expr[1] && op_expr[1]->operation == ir_unop_neg) { + return neg(mul(ir->operands[0], op_expr[1]->operands[0])); + } + /* Reassociate multiplication of constants so that we can do * constant folding. */ -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev