date:20150408

These should never happen.  Plus, NIR passes really shouldn't be
reporting linker errors - this is past link time.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir_lower_samplers.cpp | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/glsl/nir/nir_lower_samplers.cpp 
b/src/glsl/nir/nir_lower_samplers.cpp
index 7a7cf85..90e023a 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -41,17 +41,12 @@ get_sampler_index(struct gl_shader_program *shader_program,
 {
unsigned location;
if (!shader_program->UniformHash->get(location, name)) {
-  linker_error(shader_program,
-   "failed to find sampler named %s.\n", name);
+  assert(!"failed to find sampler");
   return 0;
}
 
if (!shader_program->UniformStorage[location].sampler[stage].active) {
-  assert(0 && "cannot return a sampler");
-  linker_error(shader_program,
-   "cannot return a sampler named %s, because it is not "
-   "used in this shader stage. This is a driver bug.\n",
-   name);
+  assert(!"cannot return a sampler");
   return 0;
}
 
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/12] nir: Constify prog_to_nir's gl_program pointer.

prog_to_nir should not modify the incoming Mesa IR program - just
translate it.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/program/prog_to_nir.c | 4 ++--
 src/mesa/program/prog_to_nir.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
index b298d07..c738f50 100644
--- a/src/mesa/program/prog_to_nir.c
+++ b/src/mesa/program/prog_to_nir.c
@@ -43,7 +43,7 @@
  */
 
 struct ptn_compile {
-   struct gl_program *prog;
+   const struct gl_program *prog;
nir_builder build;
bool error;
 
@@ -1052,7 +1052,7 @@ setup_registers_and_variables(struct ptn_compile *c)
 }
 
 struct nir_shader *
-prog_to_nir(struct gl_program *prog, const nir_shader_compiler_options 
*options)
+prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options 
*options)
 {
struct ptn_compile *c;
struct nir_shader *s;
diff --git a/src/mesa/program/prog_to_nir.h b/src/mesa/program/prog_to_nir.h
index 3c9b664..34e4cd1 100644
--- a/src/mesa/program/prog_to_nir.h
+++ b/src/mesa/program/prog_to_nir.h
@@ -28,7 +28,7 @@
 extern "C" {
 #endif
 
-struct nir_shader *prog_to_nir(struct gl_program *prog,
+struct nir_shader *prog_to_nir(const struct gl_program *prog,
const nir_shader_compiler_options *options);
 
 #ifdef __cplusplus
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/12] i965/nir: Make INTEL_DEBUG=ann work with NIR.

Now that we store a copy of the NIR shader, and don't immediately free
it, we can use it in annotations as well.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 4 
 src/mesa/drivers/dri/i965/intel_asm_annotation.c | 5 -
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ccffd5d..b067735 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -438,6 +438,8 @@ fs_visitor::nir_emit_block(nir_block *block)
 void
 fs_visitor::nir_emit_instr(nir_instr *instr)
 {
+   this->base_ir = instr;
+
switch (instr->type) {
case nir_instr_type_alu:
   nir_emit_alu(nir_instr_as_alu(instr));
@@ -464,6 +466,8 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
default:
   unreachable("unknown instruction type");
}
+
+   this->base_ir = NULL;
 }
 
 static brw_reg_type
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c 
b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index ac12655..eed5756 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -29,6 +29,7 @@
 #include "program/prog_print.h"
 #include "program/prog_instruction.h"
 #include "main/macros.h"
+#include "glsl/nir/nir.h"
 
 void
 dump_assembly(void *assembly, int num_annotations, struct annotation 
*annotation,
@@ -55,7 +56,9 @@ dump_assembly(void *assembly, int num_annotations, struct 
annotation *annotation
  last_annotation_ir = annotation[i].ir;
  if (last_annotation_ir) {
 fprintf(stderr, "   ");
-if (!prog->Instructions)
+if (prog->nir)
+   nir_print_instr(annotation[i].ir, stderr);
+else if (!prog->Instructions)
fprint_ir(stderr, annotation[i].ir);
 else {
const struct prog_instruction *pi =
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/12] i965: Change brw_shader to gl_shader in brw_link_shader().

Nothing actually wanted brw_shader fields - we just had to type
shader->base all over the place for no reason.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 63 
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 54d6d71..9fad02c 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -129,15 +129,14 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
   const struct gl_shader_compiler_options *options =
  &ctx->Const.ShaderCompilerOptions[stage];
-  struct brw_shader *shader =
-(struct brw_shader *)shProg->_LinkedShaders[stage];
+  struct gl_shader *shader = shProg->_LinkedShaders[stage];
 
   if (!shader)
 continue;
 
   struct gl_program *prog =
 ctx->Driver.NewProgram(ctx, _mesa_shader_stage_to_program(stage),
-shader->base.Name);
+shader->Name);
   if (!prog)
return false;
   prog->Parameters = _mesa_new_parameter_list();
@@ -147,19 +146,19 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   /* Temporary memory context for any new IR. */
   void *mem_ctx = ralloc_context(NULL);
 
-  ralloc_adopt(mem_ctx, shader->base.ir);
+  ralloc_adopt(mem_ctx, shader->ir);
 
   bool progress;
 
   /* lower_packing_builtins() inserts arithmetic instructions, so it
* must precede lower_instructions().
*/
-  brw_lower_packing_builtins(brw, (gl_shader_stage) stage, 
shader->base.ir);
-  do_mat_op_to_vec(shader->base.ir);
+  brw_lower_packing_builtins(brw, (gl_shader_stage) stage, shader->ir);
+  do_mat_op_to_vec(shader->ir);
   const int bitfield_insert = brw->gen >= 7
   ? BITFIELD_INSERT_TO_BFM_BFI
   : 0;
-  lower_instructions(shader->base.ir,
+  lower_instructions(shader->ir,
 MOD_TO_FLOOR |
 DIV_TO_MUL_RCP |
 SUB_TO_ADD_NEG |
@@ -172,21 +171,21 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
* if-statements need to be flattened.
*/
   if (brw->gen < 6)
-lower_if_to_cond_assign(shader->base.ir, 16);
+lower_if_to_cond_assign(shader->ir, 16);
 
-  do_lower_texture_projection(shader->base.ir);
-  brw_lower_texture_gradients(brw, shader->base.ir);
-  do_vec_index_to_cond_assign(shader->base.ir);
-  lower_vector_insert(shader->base.ir, true);
+  do_lower_texture_projection(shader->ir);
+  brw_lower_texture_gradients(brw, shader->ir);
+  do_vec_index_to_cond_assign(shader->ir);
+  lower_vector_insert(shader->ir, true);
   if (options->NirOptions == NULL)
- brw_do_cubemap_normalize(shader->base.ir);
-  lower_offset_arrays(shader->base.ir);
-  brw_do_lower_unnormalized_offset(shader->base.ir);
-  lower_noise(shader->base.ir);
-  lower_quadop_vector(shader->base.ir, false);
+ brw_do_cubemap_normalize(shader->ir);
+  lower_offset_arrays(shader->ir);
+  brw_do_lower_unnormalized_offset(shader->ir);
+  lower_noise(shader->ir);
+  lower_quadop_vector(shader->ir, false);
 
   bool lowered_variable_indexing =
- lower_variable_index_to_cond_assign(shader->base.ir,
+ lower_variable_index_to_cond_assign(shader->ir,
  options->EmitNoIndirectInput,
  options->EmitNoIndirectOutput,
  options->EmitNoIndirectTemp,
@@ -197,23 +196,23 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 "back to very inefficient code generation\n");
   }
 
-  lower_ubo_reference(&shader->base, shader->base.ir);
+  lower_ubo_reference(shader, shader->ir);
 
   do {
 progress = false;
 
 if (is_scalar_shader_stage(brw, stage)) {
-   brw_do_channel_expressions(shader->base.ir);
-   brw_do_vector_splitting(shader->base.ir);
+   brw_do_channel_expressions(shader->ir);
+   brw_do_vector_splitting(shader->ir);
 }
 
-progress = do_lower_jumps(shader->base.ir, true, true,
+progress = do_lower_jumps(shader->ir, true, true,
   true, /* main return */
   false, /* continue */
   false /* loops */
   ) || progress;
 
-progress = do_common_optimization(shader->base.ir, true, true,
+progress = do_common_optimization(shader->ir, tr

[Mesa-dev] [PATCH 06/12] nir: Constify nir_lower_sampler's gl_shader_program pointer.

Now that we're not generating linker errors, we don't actually modify
this.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir.h  |  2 +-
 src/glsl/nir/nir_lower_samplers.cpp | 10 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 679911c..e844e4d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1611,7 +1611,7 @@ void nir_lower_alu_to_scalar(nir_shader *shader);
 void nir_lower_phis_to_scalar(nir_shader *shader);
 
 void nir_lower_samplers(nir_shader *shader,
-struct gl_shader_program *shader_program,
+const struct gl_shader_program *shader_program,
 gl_shader_stage stage);
 
 void nir_lower_system_values(nir_shader *shader);
diff --git a/src/glsl/nir/nir_lower_samplers.cpp 
b/src/glsl/nir/nir_lower_samplers.cpp
index 90e023a..cf8ab83 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,7 +36,7 @@ extern "C" {
 }
 
 static unsigned
-get_sampler_index(struct gl_shader_program *shader_program,
+get_sampler_index(const struct gl_shader_program *shader_program,
   gl_shader_stage stage, const char *name)
 {
unsigned location;
@@ -54,7 +54,7 @@ get_sampler_index(struct gl_shader_program *shader_program,
 }
 
 static void
-lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program 
*shader_program,
   gl_shader_stage stage, void *mem_ctx)
 {
if (instr->sampler == NULL)
@@ -133,7 +133,7 @@ lower_sampler(nir_tex_instr *instr, struct 
gl_shader_program *shader_program,
 
 typedef struct {
void *mem_ctx;
-   struct gl_shader_program *shader_program;
+   const struct gl_shader_program *shader_program;
gl_shader_stage stage;
 } lower_state;
 
@@ -154,7 +154,7 @@ lower_block_cb(nir_block *block, void *_state)
 }
 
 static void
-lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
+lower_impl(nir_function_impl *impl, const struct gl_shader_program 
*shader_program,
gl_shader_stage stage)
 {
lower_state state;
@@ -167,7 +167,7 @@ lower_impl(nir_function_impl *impl, struct 
gl_shader_program *shader_program,
 }
 
 extern "C" void
-nir_lower_samplers(nir_shader *shader, struct gl_shader_program 
*shader_program,
+nir_lower_samplers(nir_shader *shader, const struct gl_shader_program 
*shader_program,
gl_shader_stage stage)
 {
nir_foreach_overload(shader, overload) {
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/12] nir: Store num_direct_uniforms in the nir_shader.

Storing this here is pretty sketchy - I don't know if any driver other
than i965 will want to use it.  But this will make it a lot easier to
generate NIR code at link time.  We'll probably rework it anyway.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir.h   | 3 +++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e844e4d..7d11996 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1429,6 +1429,9 @@ typedef struct nir_shader {
 * access plus one
 */
unsigned num_inputs, num_uniforms, num_outputs;
+
+   /** the number of uniforms that are only accessed directly */
+   unsigned num_direct_uniforms;
 } nir_shader;
 
 #define nir_foreach_overload(shader, overload)\
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 145a447..034b79a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -120,7 +120,7 @@ fs_visitor::emit_nir_code()
 
if (shader_prog) {
   nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
-   &num_direct_uniforms,
+   &nir->num_direct_uniforms,
&nir->num_uniforms);
} else {
   /* ARB programs generally create a giant array of "uniform" data, and 
allow
@@ -128,7 +128,7 @@ fs_visitor::emit_nir_code()
* analysis, it's all or nothing.  num_direct_uniforms is only useful 
when
* we have some direct and some indirect access; it doesn't matter here.
*/
-  num_direct_uniforms = 0;
+  nir->num_direct_uniforms = 0;
}
nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
@@ -343,6 +343,7 @@ void
 fs_visitor::nir_setup_uniforms(nir_shader *shader)
 {
uniforms = shader->num_uniforms;
+   num_direct_uniforms = shader->num_direct_uniforms;
 
/* We split the uniform register file in half.  The first half is
 * entirely direct uniforms.  The second half is indirect.
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/12] i965: Move brw_link_shader's GLSL IR transformations into a helper.

This function was getting a bit large and unwieldy.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 192 ---
 1 file changed, 99 insertions(+), 93 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9fad02c..bf9aceb 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -120,6 +120,104 @@ brw_lower_packing_builtins(struct brw_context *brw,
lower_packing_builtins(ir, ops);
 }
 
+static void
+process_glsl_ir(struct brw_context *brw,
+struct gl_shader_program *shader_prog,
+struct gl_shader *shader)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const struct gl_shader_compiler_options *options =
+  &ctx->Const.ShaderCompilerOptions[shader->Stage];
+
+   /* Temporary memory context for any new IR. */
+   void *mem_ctx = ralloc_context(NULL);
+
+   ralloc_adopt(mem_ctx, shader->ir);
+
+   /* lower_packing_builtins() inserts arithmetic instructions, so it
+* must precede lower_instructions().
+*/
+   brw_lower_packing_builtins(brw, shader->Stage, shader->ir);
+   do_mat_op_to_vec(shader->ir);
+   const int bitfield_insert = brw->gen >= 7 ? BITFIELD_INSERT_TO_BFM_BFI : 0;
+   lower_instructions(shader->ir,
+  MOD_TO_FLOOR |
+  DIV_TO_MUL_RCP |
+  SUB_TO_ADD_NEG |
+  EXP_TO_EXP2 |
+  LOG_TO_LOG2 |
+  bitfield_insert |
+  LDEXP_TO_ARITH);
+
+   /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
+* if-statements need to be flattened.
+*/
+   if (brw->gen < 6)
+  lower_if_to_cond_assign(shader->ir, 16);
+
+   do_lower_texture_projection(shader->ir);
+   brw_lower_texture_gradients(brw, shader->ir);
+   do_vec_index_to_cond_assign(shader->ir);
+   lower_vector_insert(shader->ir, true);
+   if (options->NirOptions == NULL)
+  brw_do_cubemap_normalize(shader->ir);
+   lower_offset_arrays(shader->ir);
+   brw_do_lower_unnormalized_offset(shader->ir);
+   lower_noise(shader->ir);
+   lower_quadop_vector(shader->ir, false);
+
+   bool lowered_variable_indexing =
+  lower_variable_index_to_cond_assign(shader->ir,
+  options->EmitNoIndirectInput,
+  options->EmitNoIndirectOutput,
+  options->EmitNoIndirectTemp,
+  options->EmitNoIndirectUniform);
+
+   if (unlikely(brw->perf_debug && lowered_variable_indexing)) {
+  perf_debug("Unsupported form of variable indexing in FS; falling "
+ "back to very inefficient code generation\n");
+   }
+
+   lower_ubo_reference(shader, shader->ir);
+
+   bool progress;
+   do {
+  progress = false;
+
+  if (is_scalar_shader_stage(brw, shader->Stage)) {
+ brw_do_channel_expressions(shader->ir);
+ brw_do_vector_splitting(shader->ir);
+  }
+
+  progress = do_lower_jumps(shader->ir, true, true,
+true, /* main return */
+false, /* continue */
+false /* loops */
+) || progress;
+
+  progress = do_common_optimization(shader->ir, true, true,
+options, ctx->Const.NativeIntegers) || 
progress;
+   } while (progress);
+
+   validate_ir_tree(shader->ir);
+
+   /* Now that we've finished altering the linked IR, reparent any live IR back
+* to the permanent memory context, and free the temporary one (discarding 
any
+* junk we optimized away).
+*/
+   reparent_ir(shader->ir, shader->ir);
+   ralloc_free(mem_ctx);
+
+   if (ctx->_Shader->Flags & GLSL_DUMP) {
+  fprintf(stderr, "\n");
+  fprintf(stderr, "GLSL IR for linked %s program %d:\n",
+  _mesa_shader_stage_to_string(shader->Stage),
+  shader_prog->Name);
+  _mesa_print_ir(stderr, shader->ir, NULL);
+  fprintf(stderr, "\n");
+   }
+}
+
 GLboolean
 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 {
@@ -127,8 +225,6 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
unsigned int stage;
 
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
-  const struct gl_shader_compiler_options *options =
- &ctx->Const.ShaderCompilerOptions[stage];
   struct gl_shader *shader = shProg->_LinkedShaders[stage];
 
   if (!shader)
@@ -143,79 +239,7 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 
   _mesa_copy_linked_program_data((gl_shader_stage) stage, shProg, prog);
 
-  /* Temporary memory context for any new IR. */
-  void *mem_ctx = ralloc_context(NULL);
-
-  ralloc_adopt(mem_ctx, shader->ir);
-
-  b

[Mesa-dev] [PATCH 02/12] nir: Fix #include guards in shader_enums.h.

This header was originally going to be called pipeline.h, but it got
renamed at the last minute.  Make the include guards match.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/shader_enums.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 0e08bd3..7f59fdc 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -23,8 +23,8 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#ifndef PIPELINE_H
-#define PIPELINE_H
+#ifndef SHADER_ENUMS_H
+#define SHADER_ENUMS_H
 
 /**
  * Bitflags for system values.
@@ -167,4 +167,4 @@ enum glsl_interp_qualifier
 };
 
 
-#endif /* PIPELINE_H */
+#endif /* SHADER_ENUMS_H */
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/12] nir: Move gl_shader_stage enum from mtypes.h to shader_enums.h.

I want to use this in some code that doesn't currently include mtypes.h.
It seems like a better place for it anyway.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir.h  |  1 +
 src/glsl/shader_enums.h | 17 +
 src/mesa/main/mtypes.h  | 19 ---
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index f9ca0f7..17a9354 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
 #include "util/set.h"
 #include "util/bitset.h"
 #include "nir_types.h"
+#include "glsl/shader_enums.h"
 #include 
 
 #include "nir_opcodes.h"
diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h
index 7f59fdc..79e0f6b 100644
--- a/src/glsl/shader_enums.h
+++ b/src/glsl/shader_enums.h
@@ -27,6 +27,23 @@
 #define SHADER_ENUMS_H
 
 /**
+ * Shader stages. Note that these will become 5 with tessellation.
+ *
+ * The order must match how shaders are ordered in the pipeline.
+ * The GLSL linker assumes that if ihttp://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

Previously, we translated into NIR and did all the optimizations and
lowering as part of running fs_visitor.  This meant that we did all of
that work twice for fragment shaders - once for SIMD8, and again for
SIMD16.  We also had to redo it every time we hit a state based
recompile.

We now generate NIR once at link time.  ARB programs don't have linking,
so we instead generate it at ProgramStringNotify time.

Mesa's fixed function vertex program handling doesn't bother to inform
the driver about new programs at all (which is rather mean), so we
generate NIR at the last minute, if it hasn't happened already.

shader-db runs ~9.4% faster on my i7-5600U, with a release build.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 174 +--
 src/mesa/drivers/dri/i965/brw_nir.c| 213 +
 src/mesa/drivers/dri/i965/brw_nir.h|   6 +
 src/mesa/drivers/dri/i965/brw_program.c|   7 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   6 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp |  17 ++-
 src/mesa/main/mtypes.h |   2 +
 src/mesa/program/program.c |   5 +
 9 files changed, 255 insertions(+), 176 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 498d5a7..6d4659f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -77,6 +77,7 @@ i965_FILES = \
brw_misc_state.c \
brw_multisample_state.h \
brw_nir.h \
+   brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_object_purgeable.c \
brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 034b79a..ccffd5d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,175 +28,10 @@
 #include "brw_fs.h"
 #include "brw_nir.h"
 
-static void
-nir_optimize(nir_shader *nir)
-{
-   bool progress;
-   do {
-  progress = false;
-  nir_lower_vars_to_ssa(nir);
-  nir_validate_shader(nir);
-  nir_lower_alu_to_scalar(nir);
-  nir_validate_shader(nir);
-  progress |= nir_copy_prop(nir);
-  nir_validate_shader(nir);
-  nir_lower_phis_to_scalar(nir);
-  nir_validate_shader(nir);
-  progress |= nir_copy_prop(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_dce(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_cse(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_peephole_select(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_algebraic(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_constant_folding(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_remove_phis(nir);
-  nir_validate_shader(nir);
-   } while (progress);
-}
-
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
-   int *count = (int *) state;
-   nir_foreach_instr(block, instr) {
-  *count = *count + 1;
-   }
-   return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
-   int count = 0;
-   nir_foreach_overload(nir, overload) {
-  if (!overload->impl)
- continue;
-  nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
-   }
-   return count;
-}
-
 void
 fs_visitor::emit_nir_code()
 {
-   const nir_shader_compiler_options *options =
-  ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-
-   nir_shader *nir;
-   /* First, lower the GLSL IR or Mesa IR to NIR */
-   if (shader_prog) {
-  nir = glsl_to_nir(&shader->base, options);
-   } else {
-  nir = prog_to_nir(prog, options);
-  nir_convert_to_ssa(nir); /* turn registers into SSA */
-   }
-   nir_validate_shader(nir);
-
-   nir_lower_global_vars_to_local(nir);
-   nir_validate_shader(nir);
-
-   nir_lower_tex_projector(nir);
-   nir_validate_shader(nir);
-
-   nir_normalize_cubemap_coords(nir);
-   nir_validate_shader(nir);
-
-   nir_split_var_copies(nir);
-   nir_validate_shader(nir);
-
-   nir_optimize(nir);
-
-   /* Lower a bunch of stuff */
-   nir_lower_var_copies(nir);
-   nir_validate_shader(nir);
-
-   /* Get rid of split copies */
-   nir_optimize(nir);
-
-   if (shader_prog) {
-  nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
-   &nir->num_direct_uniforms,
-   &nir->num_uniforms);
-   } else {
-  /* ARB programs generally create a giant array of "uniform" data, and 
allow
-   * indirect addressing without any boundaries.  In the absence of bounds
-   * analysis, it's all or nothing.  num_direct_uniforms is only useful 
when
-   * we have some direct and some indirect access; it doesn't matter

[Mesa-dev] [PATCH 09/12] i965: Move lower_output_reads to brw_link_shader().

This makes it so emit_nir_code() doesn't modify the GLSL IR.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 1 -
 src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7c56290..145a447 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -90,7 +90,6 @@ fs_visitor::emit_nir_code()
nir_shader *nir;
/* First, lower the GLSL IR or Mesa IR to NIR */
if (shader_prog) {
-  lower_output_reads(shader->base.ir);
   nir = glsl_to_nir(&shader->base, options);
} else {
   nir = prog_to_nir(prog, options);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index bf9aceb..8700077 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -199,6 +199,9 @@ process_glsl_ir(struct brw_context *brw,
 options, ctx->Const.NativeIntegers) || 
progress;
} while (progress);
 
+   if (options->NirOptions != NULL)
+  lower_output_reads(shader->ir);
+
validate_ir_tree(shader->ir);
 
/* Now that we've finished altering the linked IR, reparent any live IR back
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 00/12] i965: Generate NIR at link time

Hello,

This series makes i965 generate NIR at link time (or ProgramStringNotify
time for ARB programs), rather than on each FS/VS compile.  This means
we only do it once, rather than for SIMD8 and again for SIMD16 programs.
It also means we can avoid it when doing state based recompiles.

It speeds up shader-db on my Broadwell by about 9.4%.

It also adds INTEL_DEBUG=ann support, now that we keep a persistent copy
of the NIR program around for the annotations to refer to.

Available in the 'nir-link' branch of ~kwg/mesa.

--Ken

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/12] nir: Make nir_lower_samplers take a gl_shader_stage, not a gl_program *.

We don't actually need a gl_program struct.  We only used it to
translate prog->Target (i.e. GL_VERTEX_PROGRAM) to the gl_shader_stage
(i.e. MESA_SHADER_VERTEX).  We may as well just pass that.

Signed-off-by: Kenneth Graunke 
---
 src/glsl/nir/nir.h   |  2 +-
 src/glsl/nir/nir_lower_samplers.cpp  | 26 --
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  2 +-
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 17a9354..679911c 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1612,7 +1612,7 @@ void nir_lower_phis_to_scalar(nir_shader *shader);
 
 void nir_lower_samplers(nir_shader *shader,
 struct gl_shader_program *shader_program,
-struct gl_program *prog);
+gl_shader_stage stage);
 
 void nir_lower_system_values(nir_shader *shader);
 void nir_lower_tex_projector(nir_shader *shader);
diff --git a/src/glsl/nir/nir_lower_samplers.cpp 
b/src/glsl/nir/nir_lower_samplers.cpp
index 1e509a9..7a7cf85 100644
--- a/src/glsl/nir/nir_lower_samplers.cpp
+++ b/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,11 +36,9 @@ extern "C" {
 }
 
 static unsigned
-get_sampler_index(struct gl_shader_program *shader_program, const char *name,
-  const struct gl_program *prog)
+get_sampler_index(struct gl_shader_program *shader_program,
+  gl_shader_stage stage, const char *name)
 {
-   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
unsigned location;
if (!shader_program->UniformHash->get(location, name)) {
   linker_error(shader_program,
@@ -48,7 +46,7 @@ get_sampler_index(struct gl_shader_program *shader_program, 
const char *name,
   return 0;
}
 
-   if (!shader_program->UniformStorage[location].sampler[shader].active) {
+   if (!shader_program->UniformStorage[location].sampler[stage].active) {
   assert(0 && "cannot return a sampler");
   linker_error(shader_program,
"cannot return a sampler named %s, because it is not "
@@ -57,12 +55,12 @@ get_sampler_index(struct gl_shader_program *shader_program, 
const char *name,
   return 0;
}
 
-   return shader_program->UniformStorage[location].sampler[shader].index;
+   return shader_program->UniformStorage[location].sampler[stage].index;
 }
 
 static void
 lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
-  const struct gl_program *prog, void *mem_ctx)
+  gl_shader_stage stage, void *mem_ctx)
 {
if (instr->sampler == NULL)
   return;
@@ -133,7 +131,7 @@ lower_sampler(nir_tex_instr *instr, struct 
gl_shader_program *shader_program,
   }
}
 
-   instr->sampler_index += get_sampler_index(shader_program, name, prog);
+   instr->sampler_index += get_sampler_index(shader_program, stage, name);
 
instr->sampler = NULL;
 }
@@ -141,7 +139,7 @@ lower_sampler(nir_tex_instr *instr, struct 
gl_shader_program *shader_program,
 typedef struct {
void *mem_ctx;
struct gl_shader_program *shader_program;
-   struct gl_program *prog;
+   gl_shader_stage stage;
 } lower_state;
 
 static bool
@@ -152,7 +150,7 @@ lower_block_cb(nir_block *block, void *_state)
nir_foreach_instr(block, instr) {
   if (instr->type == nir_instr_type_tex) {
  nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
- lower_sampler(tex_instr, state->shader_program, state->prog,
+ lower_sampler(tex_instr, state->shader_program, state->stage,
state->mem_ctx);
   }
}
@@ -162,23 +160,23 @@ lower_block_cb(nir_block *block, void *_state)
 
 static void
 lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
-   struct gl_program *prog)
+   gl_shader_stage stage)
 {
lower_state state;
 
state.mem_ctx = ralloc_parent(impl);
state.shader_program = shader_program;
-   state.prog = prog;
+   state.stage = stage;
 
nir_foreach_block(impl, lower_block_cb, &state);
 }
 
 extern "C" void
 nir_lower_samplers(nir_shader *shader, struct gl_shader_program 
*shader_program,
-   struct gl_program *prog)
+   gl_shader_stage stage)
 {
nir_foreach_overload(shader, overload) {
   if (overload->impl)
- lower_impl(overload->impl, shader_program, prog);
+ lower_impl(overload->impl, shader_program, stage);
}
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index a874337..7c56290 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -141,7 +141,7 @@ fs_visitor::emit_nir_code()
nir_validate_shader(nir);
 
if (shader_prog) {
-  nir_lower_samplers(nir, shader_prog, shader->base.Program);
+  nir_lower_samplers(nir, shader_prog, stage);
   nir_validate_shader(nir);
}
 
-- 
2.3.5

_

Re: [Mesa-dev] DMA_BUF render targets disabled for intel

2015-04-08 Thread Volker Vogelhuber

On 07.04.2015 21:54, Chad Versace wrote:

On Thu 02 Apr 2015, Axel Davy wrote:

Hi,

you may be interesting look at this related bug report:
https://bugs.freedesktop.org/show_bug.cgi?id=87452#c5

Yours,

Axel Davy

On 02/04/2015 11:58, Volker Vogelhuber wrote :
We currently want to stream OpenGL output to an FPGA that does not
provide
a SG controller and should manage the transfers from the CPU memory
to it's
own hardware. For that reason we want to have the OpenGL driver
(intel baytrail)
to render at a specific memory area within the CPU system. Render to
texture as

it is possible e.g. on the PowerVR 530 seems not to be possible, as
GL_TEXTURE_EXTERNAL_OES is not valid for glFrameBufferTexture2D and
in contrast to the PowerVR OpenGL implementation, Mesa seems to
prohibit the
use of GL_TEXTURE_2D for textures created by
glEGLImageTargetTexture2DOES
(there is a check within Mesa where glEGLImageTargetTexture2DOES's
target has
to be equal to the target of the texture => GL_TEXTURE_EXTERNAL_OES
!= GL_TEXTURE_2D).

So the only possible way to render to an EGLImage with memory
allocated by myself
seems to be the use of glEGLImageTargetRenderbufferStorageOES and
bind this

render buffer using glFramebufferRenderbuffer to the FBO.

But for some reason, it seems to be forbidden to use an EGLImage
imported from
a dmabuf as render buffer. At least within
src/mesa/drivers/dri/i965/intel_fbo.c there

is a check:

/* Buffers originating from outside are for read-only. */
if (image->dma_buf_imported) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEGLImageTargetRenderbufferStorage(dma buffers are
read-only)");

return;
}

This prevents me from doing what I wanted to do and I googled a bit.
I found someone else that just removed that check:

https://github.com/kalyankondapally/Chromium-OzoneGBM/blob/master/0010-i965-remove-read-only-restriction-of-imported-buffer.patch

That patch isn't safe for general renderbuffer usage... details below.
(As an aside, Chrome OS also has a similar patch in their Mesa tree. But
it's safe for Chrome OS, at least for now).

Why it's safe only for ChromeOS? Do you mean it's not safe for X11 or
is there something else, I should be aware of. Actually we're not using
X11 ourselfs,

but only raw DRM/KMS infrastructure.

and after I did so myself, it just worked as I wanted it to work. I
only wonder why this
limitation has been added. Is it just for some pedantic reasons or
is there any good reason
why EGLImages imported from dmabuf descriptors shouldn't be used for
render targets?

There is a very good reason. It is not pedantic. And me and Tapani
(CC'd) are working on enabling this. See
[https://bugs.freedesktop.org/show_bug.cgi?id=87452#c7] for my
work-in-progress patches.

The reason is that, on Intel chipsets Ivybridge and newer, the i965
driver often expects each color buffer to have an auxiliary metadata
buffer that holds compresson information. If the aux buffer does not
exist, i965 will create it. If the metadata buffer and the real color
buffer become unsynchronized (which is *very* likey when using a dma_buf
as renderbuffer storage), you will get corrupt rendering. If you haven't
got corrupt rendering, it's solely due to luck (and that luck is
proportional to the density of cleared pixels exist in the buffer).

Based on your patches I had a quick look in the source code for creating MCS
buffers, but without knowing details about the intel GPUs I doubt it
makes much

sense for me to dive too deep into it.

Therefore, i965 needs to be taught to disable aux buffers for
dma_buf-backed storage. Before that happens, you risk corrupted images
if you render to a dma_buf-backed renderbuffer.

If you apply Kalyan's patch on top of my (untested) patches, then
that should safely enable what you're doing with the FPGA. (There may be
still be bugs with EGLImage orphaning semantics, but that likely won't
affect you).

Thanks again. Are there any forecasts when it will be available upstream?

Regards,
Volker

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

2015-04-08 Thread Martin Peres


On 08/04/15 10:06, Kenneth Graunke wrote:

Previously, we translated into NIR and did all the optimizations and
lowering as part of running fs_visitor.  This meant that we did all of
that work twice for fragment shaders - once for SIMD8, and again for
SIMD16.  We also had to redo it every time we hit a state based
recompile.

We now generate NIR once at link time.  ARB programs don't have linking,
so we instead generate it at ProgramStringNotify time.

Mesa's fixed function vertex program handling doesn't bother to inform
the driver about new programs at all (which is rather mean), so we
generate NIR at the last minute, if it hasn't happened already.

shader-db runs ~9.4% faster on my i7-5600U, with a release build.


Nice speed improvement but wouldn't it affect negatively programs using 
SSO to recombine shaders at run time?


Signed-off-by: Kenneth Graunke 
---
  src/mesa/drivers/dri/i965/Makefile.sources |   1 +
  src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 174 +--
  src/mesa/drivers/dri/i965/brw_nir.c| 213 +
  src/mesa/drivers/dri/i965/brw_nir.h|   6 +
  src/mesa/drivers/dri/i965/brw_program.c|   7 +
  src/mesa/drivers/dri/i965/brw_shader.cpp   |   6 +
  src/mesa/drivers/dri/i965/brw_vec4.cpp |  17 ++-
  src/mesa/main/mtypes.h |   2 +
  src/mesa/program/program.c |   5 +
  9 files changed, 255 insertions(+), 176 deletions(-)
  create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 498d5a7..6d4659f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -77,6 +77,7 @@ i965_FILES = \
brw_misc_state.c \
brw_multisample_state.h \
brw_nir.h \
+   brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
brw_object_purgeable.c \
brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 034b79a..ccffd5d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,175 +28,10 @@
  #include "brw_fs.h"
  #include "brw_nir.h"
  
-static void

-nir_optimize(nir_shader *nir)
-{
-   bool progress;
-   do {
-  progress = false;
-  nir_lower_vars_to_ssa(nir);
-  nir_validate_shader(nir);
-  nir_lower_alu_to_scalar(nir);
-  nir_validate_shader(nir);
-  progress |= nir_copy_prop(nir);
-  nir_validate_shader(nir);
-  nir_lower_phis_to_scalar(nir);
-  nir_validate_shader(nir);
-  progress |= nir_copy_prop(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_dce(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_cse(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_peephole_select(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_algebraic(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_constant_folding(nir);
-  nir_validate_shader(nir);
-  progress |= nir_opt_remove_phis(nir);
-  nir_validate_shader(nir);
-   } while (progress);
-}
-
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
-   int *count = (int *) state;
-   nir_foreach_instr(block, instr) {
-  *count = *count + 1;
-   }
-   return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
-   int count = 0;
-   nir_foreach_overload(nir, overload) {
-  if (!overload->impl)
- continue;
-  nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
-   }
-   return count;
-}
-
  void
  fs_visitor::emit_nir_code()
  {
-   const nir_shader_compiler_options *options =
-  ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-
-   nir_shader *nir;
-   /* First, lower the GLSL IR or Mesa IR to NIR */
-   if (shader_prog) {
-  nir = glsl_to_nir(&shader->base, options);
-   } else {
-  nir = prog_to_nir(prog, options);
-  nir_convert_to_ssa(nir); /* turn registers into SSA */
-   }
-   nir_validate_shader(nir);
-
-   nir_lower_global_vars_to_local(nir);
-   nir_validate_shader(nir);
-
-   nir_lower_tex_projector(nir);
-   nir_validate_shader(nir);
-
-   nir_normalize_cubemap_coords(nir);
-   nir_validate_shader(nir);
-
-   nir_split_var_copies(nir);
-   nir_validate_shader(nir);
-
-   nir_optimize(nir);
-
-   /* Lower a bunch of stuff */
-   nir_lower_var_copies(nir);
-   nir_validate_shader(nir);
-
-   /* Get rid of split copies */
-   nir_optimize(nir);
-
-   if (shader_prog) {
-  nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
-   &nir->num_direct_uniforms,
-   &nir->num_uniforms);
-   } else {
-  /* ARB programs generally create a giant array of "uniform" data, and 
allow
-   * indirect addressing without any boundaries.  In th

[Mesa-dev] [PATCH] r600g/sb: Skip empty ALU clause while scheduling

2015-04-08 Thread Glenn Kennard

Fixes assert triggered by
ext_transform_feedback-intervening-read output use_gs
piglit test.

Signed-off-by: Glenn Kennard 
---
 src/gallium/drivers/r600/sb/sb_sched.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/r600/sb/sb_sched.cpp 
b/src/gallium/drivers/r600/sb/sb_sched.cpp
index 4248a3f..2e38a62 100644
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -825,6 +825,9 @@ void post_scheduler::init_regmap() {
 
 void post_scheduler::process_alu(container_node *c) {
 
+   if (c->empty())
+   return;
+
ucm.clear();
alu.reset();
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: Allow any sort of sampler array indexing with GLSL ES < 3.00

2015-04-08 Thread Tapani Pälli




On 04/08/2015 01:36 AM, Ian Romanick wrote:

On 04/07/2015 03:22 AM, Francisco Jerez wrote:

Tapani Pälli  writes:


From: Kalyan Kondapally 

Dynamic indexing of sampler arrays is prohibited by GLSL ES 3.00.
Earlier versions allow 'constant-index-expression' indexing, where
index can contain a loop induction variable.

Patch allows dynamic indexing for sampler arrays when GLSL ES < 3.00.
This change makes 'sampler-array-index.frag' parser test in Piglit
pass + fishgl.com works when running Chrome on OpenGL ES 2.0 backend.

v2: small change and some more commit message (Tapani)

Signed-off-by: Kalyan Kondapally 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84225


Looks good, but did you check what happens now if the shader uses actual
variable indexing (i.e. which lowering cannot turn into a constant) on
an implementation that doesn't support it?  Hopefully no crashes or
hangs?


I think we should add a post-link check that no dynamic indexing remains
after all the optimizations are complete.  The intention if the ES2
language was to allow cases where the dynamic indexing could be
optimized away.  This was redacted in ES3 because each optimizer was
differently capable, so a shader that worked on one driver/GPU might
fail on another... even from the same vendor.

Adding the post-link check should prevent the problems the Curro
(rightly) worried about, and it should still allow the WebGL demo to work.


I was not sure if this is worth the effort since this path has been 
active for desktop GLSL < 1.30 for quite a long time, but I can take a 
look at adding such check.





---
  src/glsl/ast_array_index.cpp | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index ecef651..b2609b6 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -226,7 +226,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
 * dynamically uniform expression is undefined.
 */
if (array->type->element_type()->is_sampler()) {
-if (!state->is_version(130, 100)) {
+if (!state->is_version(130, 300)) {
if (state->es_shader) {
   _mesa_glsl_warning(&loc, state,
  "sampler arrays indexed with non-constant "


It looks like this is what e3ded7f should have made this code.

Looking at the rest of the surrounding code, I don't think this is quite
right... at the very least, it's not easy to follow.  You can blame me
and Paul for that.  I think this is correct and easier to follow:

if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
   if (state->is_version(130, 300))
  _mesa_glsl_error(&loc, state,
 "sampler arrays indexed with non-constant "
 "expressions are forbidden in GLSL %s "
 "and later"
 state->es_shader ? "ES 3.00" : "1.30");
   else if (state->es_shader)
  _mesa_glsl_warning(&loc, state,
 "sampler arrays indexed with non-constant "
 "expressions are optional in %s and will "
 "be forbidden in GLSL ES 3.00 and later"
 state->version_string());
   else
  _mesa_glsl_warning(&loc, state,
 "sampler arrays indexed with non-constant "
 "expressions will be forbidden in GLSL "
 "1.30 and later");
}


OK, thanks!


--
2.1.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] st_TexSubImage: unaligned memcpy performance

2015-04-08 Thread Vasilis Liaskovitis

Hi,

(sorry for possible double-posting, i sent this earlier but before
subscribing to mesa-dev list)

I have an issue where st_TexSubImage causes very high CPU load in
__memcpy_sse2_unaligned (Mesa 10.1.3, Xorg 1.15.1, radeon driver, HD 7870).

Any obvious causes / tips for this? e.g. align textures or use different
format/type? I 've tried using GL_BGRA/GL_UNSIGNED_BYTE and
GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV

__memcpy_sse2_unaligned () at
../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85
85../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: No such file or
directory.
(gdb) bt
#0  __memcpy_sse2_unaligned () at
../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85
#1  0x7fffb572f154 in memcpy (__len=7680, __src=,
__dest=0x7fff5835f800) at /usr/include/x86_64-linux-gnu/bits/string3.h:51
#2  st_TexSubImage (ctx=0x1b91420, dims=,
texImage=0x1f81710, xoffset=0, yoffset=0, zoffset=0, width=1920,
height=1080, depth=1, format=32993, type=5121, pixels=0xdacf90,
unpack=0x1bad590)
at ../../../../src/mesa/state_tracker/st_cb_texture.c:752
#3  0x7fffb56c283d in texsubimage (ctx=0x1b91420, dims=dims@entry=2,
target=3553, level=0, xoffset=0, yoffset=0, zoffset=zoffset@entry=0,
width=1920, height=1080, depth=depth@entry=1,
format=format@entry=32993, type=type@entry=5121,
pixels=pixels@entry=0xdacf90)
at ../../../../src/mesa/main/teximage.c:3445
#4  0x7fffb56c659c in _mesa_TexSubImage2D (target=,
level=, xoffset=, yoffset=,
width=, height=,
format=32993, type=5121, pixels=0xdacf90) at
../../../../src/mesa/main/teximage.c:3483
#5  0x7346191a in ?? () from
/opt/build/Qt/5.4/gcc_64/lib/libQt5Gui.so.5
#6  0x7345e6ab in ?? () from
/opt/build/Qt/5.4/gcc_64/lib/libQt5Gui.so.5
#7  0x7345ea32 in QOpenGLTexture::setData(int,
QOpenGLTexture::PixelFormat, QOpenGLTexture::PixelType, void*,
QOpenGLPixelTransferOptions const*) ()
   from /opt/build/Qt/5.4/gcc_64/lib/libQt5Gui.so.5


thanks for any help,

- Vasilis
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] st_TexSubImage: unaligned memcpy performance

2015-04-08 Thread Daniel Stone

Hi,

On 8 April 2015 at 10:57, Vasilis Liaskovitis  wrote:
> I have an issue where st_TexSubImage causes very high CPU load in
> __memcpy_sse2_unaligned (Mesa 10.1.3, Xorg 1.15.1, radeon driver, HD 7870).
>
> Any obvious causes / tips for this? e.g. align textures or use different
> format/type? I 've tried using GL_BGRA/GL_UNSIGNED_BYTE and
> GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV
>
> __memcpy_sse2_unaligned () at
> ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85
> 85../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S: No such file or
> directory.
> (gdb) bt
> #0  __memcpy_sse2_unaligned () at
> ../sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S:85
> #1  0x7fffb572f154 in memcpy (__len=7680, __src=,
> __dest=0x7fff5835f800) at /usr/include/x86_64-linux-gnu/bits/string3.h:51
> #2  st_TexSubImage (ctx=0x1b91420, dims=, texImage=0x1f81710,
> xoffset=0, yoffset=0, zoffset=0, width=1920, height=1080, depth=1,
> format=32993, type=5121, pixels=0xdacf90, unpack=0x1bad590)
> at ../../../../src/mesa/state_tracker/st_cb_texture.c:752

Your source (0xdacf90) is only aligned to a 16-byte boundary, not 32.
This will cause issues particularly on ARM, where natural alignment is
required (i.e. 32-byte load/stores must be on 32-byte boundaries). By
contrast, the destination is already aligned to a 128-byte boundary.
So fixing the caller, rather than Mesa, should take care of the
problem.

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Fix automatic indentation mode for recent emacs, use fewer columns in .git

2015-04-08 Thread Neil Roberts

It seems a bit strange that this has stopped working for you. If you
specify a mode in the .dir-locals.el file then it's supposed to set the
variable for any files with that mode or any modes inherited from that
mode. The C and C++ modes both inherit from prog-mode, as well as a
bunch of other ones such as Python and lisp files. If you are using a
non-standard mode for C files it would be surprising if it doesn't also
inherit from prog-mode. I have just tested this with emacs -q (to
prevent it from loading my personal config) on Emacs 24.3.1 and it does
work as is.

I don't think the patch would break anything for me since you explicitly
set the fill-column back to 70 for commit messages so I don't care
enough to complain if you want to commit it anyway, but it does seem
like something fishy is going on and the reasoning in the commit message
doesn't add up.

Regards,
- Neil

Carl Worth  writes:

> I recently noticed (after upgrading to emacs 24?) that I was no longer
> getting automatic C-style settings in emacs like I was accustomed to
> getting. That is, I was now getting a default indentation of 8 and
> indentation with tabs instead of spaces.
>
> It appears that the .dir-locals.el file is no longer taking
> effect. Presumably, emacs was previously using "prog-mode" for C and
> C++ source files but is now using a mode with some other name?
>
> I didn't chase down the name of the current mode, but just using "nil"
> makes these variables get set on all files, (which should be mostly
> harmless), and should be compatible with both old and new emacs.
>
> I did verify that the later change in this file (to indent with tabs
> when in makefile-mode) still takes precendence as desired.
>
> While editing these files, I've also set things up to use a smaller
> value for fill-column when editing a file within the ".git"
> directory. This will help avoid commit messages getting wrapped when
> "git log" adds some extra indentation.
>
> Note: If this change causes .dir-locals.el to take effect for someone
> when it never had before, then emacs may prompt about the potentially
> "unsafe" eval block here. User can reply to that prompt with "!" to
> permanently whitelist this particular eval block as safe so that
> prompt will not be seen again in the future.
> ---
>  .dir-locals.el| 4 ++--
>  src/gallium/drivers/freedreno/.dir-locals.el  | 2 +-
>  src/gallium/drivers/r600/.dir-locals.el   | 2 +-
>  src/gallium/drivers/radeon/.dir-locals.el | 2 +-
>  src/gallium/drivers/radeonsi/.dir-locals.el   | 2 +-
>  src/gallium/drivers/vc4/.dir-locals.el| 2 +-
>  src/gallium/drivers/vc4/kernel/.dir-locals.el | 2 +-
>  src/gallium/winsys/radeon/.dir-locals.el  | 2 +-
>  src/mesa/drivers/dri/nouveau/.dir-locals.el   | 2 +-
>  9 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/.dir-locals.el b/.dir-locals.el
> index d95eb48..f44d964 100644
> --- a/.dir-locals.el
> +++ b/.dir-locals.el
> @@ -1,12 +1,12 @@
> -((prog-mode
> +((nil
>(indent-tabs-mode . nil)
>(tab-width . 8)
>(c-basic-offset . 3)
>(c-file-style . "stroustrup")
> -  (fill-column . 78)
>(eval . (progn
>   (c-set-offset 'innamespace '0)
>   (c-set-offset 'inline-open '0)))
>)
> + (".git" (nil (fill-column . 70)))
>   (makefile-mode (indent-tabs-mode . t))
>   )
> diff --git a/src/gallium/drivers/freedreno/.dir-locals.el 
> b/src/gallium/drivers/freedreno/.dir-locals.el
> index aa20d49..c26578b 100644
> --- a/src/gallium/drivers/freedreno/.dir-locals.el
> +++ b/src/gallium/drivers/freedreno/.dir-locals.el
> @@ -1,4 +1,4 @@
> -((prog-mode
> +((nil
>(indent-tabs-mode . true)
>(tab-width . 4)
>(c-basic-offset . 4)
> diff --git a/src/gallium/drivers/r600/.dir-locals.el 
> b/src/gallium/drivers/r600/.dir-locals.el
> index 4e35c12..8be6a30 100644
> --- a/src/gallium/drivers/r600/.dir-locals.el
> +++ b/src/gallium/drivers/r600/.dir-locals.el
> @@ -1,4 +1,4 @@
> -((prog-mode
> +((nil
>(indent-tabs-mode . true)
>(tab-width . 8)
>(c-basic-offset . 8)
> diff --git a/src/gallium/drivers/radeon/.dir-locals.el 
> b/src/gallium/drivers/radeon/.dir-locals.el
> index 4e35c12..8be6a30 100644
> --- a/src/gallium/drivers/radeon/.dir-locals.el
> +++ b/src/gallium/drivers/radeon/.dir-locals.el
> @@ -1,4 +1,4 @@
> -((prog-mode
> +((nil
>(indent-tabs-mode . true)
>(tab-width . 8)
>(c-basic-offset . 8)
> diff --git a/src/gallium/drivers/radeonsi/.dir-locals.el 
> b/src/gallium/drivers/radeonsi/.dir-locals.el
> index 4e35c12..8be6a30 100644
> --- a/src/gallium/drivers/radeonsi/.dir-locals.el
> +++ b/src/gallium/drivers/radeonsi/.dir-locals.el
> @@ -1,4 +1,4 @@
> -((prog-mode
> +((nil
>(indent-tabs-mode . true)
>(tab-width . 8)
>(c-basic-offset . 8)
> diff --git a/src/gallium/drivers/vc4/.dir-locals.el 
> b/src/gallium/drivers/vc4/.dir-locals.el
> index ac94242..ed10dc2 100644
> --- a/src/gallium/drivers/vc4/.dir-locals.el
> +++ b/src/gallium/dri

[Mesa-dev] [PATCH] gallium/ttn: use single component address register

From: Rob Clark 

Only needs to be a vec1, and this helps out the later opt stages.  From
the shader (after opt) for fs-temp-array-mat3-index-col-row-wr goes,
before:

vec1 ssa_408 = imul ssa_155, ssa_1
vec4 ssa_413 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w
vec4 ssa_166 = intrinsic load_uniform () () (0, 1)
vec4 ssa_772 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z
intrinsic store_var (ssa_772) (arr_5[ssa_413]) ()
vec4 ssa_416 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w
vec4 ssa_178 = intrinsic load_uniform () () (1, 1)
vec4 ssa_787 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z
intrinsic store_var (ssa_787) (arr_5[1 + ssa_416]) ()
vec4 ssa_190 = intrinsic load_uniform () () (2, 1)
vec4 ssa_802 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z
intrinsic store_var (ssa_802) (arr_5[2 + ssa_416]) ()

after:

vec1 ssa_408 = imul ssa_155, ssa_1
vec4 ssa_166 = intrinsic load_uniform () () (0, 1)
vec4 ssa_763 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z
intrinsic store_var (ssa_763) (arr_5[ssa_408]) ()
vec4 ssa_178 = intrinsic load_uniform () () (1, 1)
vec4 ssa_778 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z
intrinsic store_var (ssa_778) (arr_5[1 + ssa_408]) ()
vec4 ssa_190 = intrinsic load_uniform () () (2, 1)
vec4 ssa_793 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z
intrinsic store_var (ssa_793) (arr_5[2 + ssa_408]) ()

ie. it realizes the indirect is the same for all three store_var's
which avoids my backend generating duplicate (mov (shl (cov)))
instruction chains.

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index f4c0bad..5bd8ca0 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -148,7 +148,7 @@ ttn_emit_declaration(struct ttn_compile *c)
   }
} else if (file == TGSI_FILE_ADDRESS) {
   c->addr_reg = nir_local_reg_create(b->impl);
-  c->addr_reg->num_components = 4;
+  c->addr_reg->num_components = 1;
} else if (file == TGSI_FILE_SAMPLER) {
   /* Nothing to record for samplers. */
} else {
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] DMA_BUF render targets disabled for intel

2015-04-08 Thread Chad Versace

On Wed 08 Apr 2015, Volker Vogelhuber wrote:

On 07.04.2015 21:54, Chad Versace wrote:

On Thu 02 Apr 2015, Axel Davy wrote:

Hi,

you may be interesting look at this related bug report:
https://bugs.freedesktop.org/show_bug.cgi?id=87452#c5

Yours,

Axel Davy

On 02/04/2015 11:58, Volker Vogelhuber wrote :
We currently want to stream OpenGL output to an FPGA that does
not provide
a SG controller and should manage the transfers from the CPU
memory to it's
own hardware. For that reason we want to have the OpenGL driver
(intel baytrail)
to render at a specific memory area within the CPU system.
Render to texture as

it is possible e.g. on the PowerVR 530 seems not to be possible, as
GL_TEXTURE_EXTERNAL_OES is not valid for glFrameBufferTexture2D and
in contrast to the PowerVR OpenGL implementation, Mesa seems to
prohibit the
use of GL_TEXTURE_2D for textures created by
glEGLImageTargetTexture2DOES
(there is a check within Mesa where
glEGLImageTargetTexture2DOES's target has
to be equal to the target of the texture =>
GL_TEXTURE_EXTERNAL_OES != GL_TEXTURE_2D).

So the only possible way to render to an EGLImage with memory
allocated by myself
seems to be the use of glEGLImageTargetRenderbufferStorageOES
and bind this

render buffer using glFramebufferRenderbuffer to the FBO.

But for some reason, it seems to be forbidden to use an EGLImage
imported from
a dmabuf as render buffer. At least within
src/mesa/drivers/dri/i965/intel_fbo.c there

is a check:

/* Buffers originating from outside are for read-only. */
if (image->dma_buf_imported) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glEGLImageTargetRenderbufferStorage(dma buffers are
read-only)");

return;
}

This prevents me from doing what I wanted to do and I googled a bit.
I found someone else that just removed that check:

https://github.com/kalyankondapally/Chromium-OzoneGBM/blob/master/0010-i965-remove-read-only-restriction-of-imported-buffer.patch

That patch isn't safe for general renderbuffer usage... details below.
(As an aside, Chrome OS also has a similar patch in their Mesa tree. But
it's safe for Chrome OS, at least for now).

Why it's safe only for ChromeOS? Do you mean it's not safe for X11 or
is there something else, I should be aware of. Actually we're not
using X11 ourselfs, but only raw DRM/KMS infrastructure.

Not X11, something else. The probability of render corruption is
proportional to the density of cleared pixels. Specifically, when the
MCS buffer is present, the hardware groups the color buffer into blocks
of pixels. If no pixel in a given block has been rendered since the
previous glClear, then the pixel data in the color buffer is undefined
for that block. The real data for the cleared block lives in the MCS.
So, when a non-GL consumer of the dma_buf reads the color buffer (in
Chrome OS's case, the consumer is Intel's display engine), it will read
undefined pixel data for that block. (If the dma_buf consumer writes
directly to the dma_buf, similar issues arise).

KMS-based Chrome OS renders correctly due to luck: it never calls
glClear on a dma_buf-backed renderbuffer. Chrome always renders to every
pixel in the buffer.

and after I did so myself, it just worked as I wanted it to
work. I only wonder why this
limitation has been added. Is it just for some pedantic reasons
or is there any good reason
why EGLImages imported from dmabuf descriptors shouldn't be used
for render targets?

There is a very good reason. It is not pedantic. And me and Tapani
(CC'd) are working on enabling this. See
[https://bugs.freedesktop.org/show_bug.cgi?id=87452#c7] for my
work-in-progress patches.

Based on your patches I had a quick look in the source code for creating MCS
buffers, but without knowing details about the intel GPUs I doubt it
makes much sense for me to dive too deep into it.

Therefore, i965 needs to be taught to disable aux buffers for
dma_buf-backed storage. Before that happens, you risk corrupted images
if you render to a dma_buf-backed renderbuffer.

Thanks again. Are there any forecasts when it will be available upstream?

Soon. I'm going to validate the patches on an Ivybridge Chromebook today
and immediately submit th

Re: [Mesa-dev] [PATCH] gallium/ttn: add support for temp arrays

2015-04-08 Thread Eric Anholt

Rob Clark  writes:

> From: Rob Clark 
>
> Since the rest of NIR really would rather have these as variables rather
> than registers, create a nir_variable per array.  But rather than
> completely re-arrange ttn to be variable based rather than register
> based, keep the registers.  In the cases where there is a matching var
> for the reg, ttn_emit_instruction will append the appropriate intrinsic
> to get things back from the shadow reg into the variable.
>
> NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give
> an array id.  But those just kinda suck, and should really go away.
> AFAICT we don't get those from glsl.  Might be an issue for some other
> state tracker.
>
> v2: rework to use load_var/store_var with deref chains
>
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/auxiliary/nir/tgsi_to_nir.c | 122 
> +++-
>  1 file changed, 103 insertions(+), 19 deletions(-)
>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
> b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index da935a4..f4c0bad 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -44,6 +44,7 @@
>  struct ttn_reg_info {
> /** nir register containing this TGSI index. */
> nir_register *reg;
> +   nir_variable *var;
> /** Offset (in vec4s) from the start of var for this TGSI index. */
> int offset;
>  };
> @@ -121,22 +122,29 @@ ttn_emit_declaration(struct ttn_compile *c)
>  
> if (file == TGSI_FILE_TEMPORARY) {
>nir_register *reg;
> -  if (c->scan->indirect_files & (1 << file)) {
> +  nir_variable *var = NULL;
> +
> +  if (decl->Declaration.Array) {
> + /* for arrays, the register created just serves as a
> +  * shadow register.  We append intrinsic_store_global
> +  * after the tgsi instruction is translated to move
> +  * back from the shadow register to the variable
> +  */
> + var = rzalloc(b->shader, nir_variable);
> +
> + var->type = glsl_array_type(glsl_vec4_type(), array_size);
> + var->data.mode = nir_var_global;
> + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
> +
> + exec_list_push_tail(&b->shader->globals, &var->node);
> +  }
> +
> +  for (i = 0; i < array_size; i++) {
>   reg = nir_local_reg_create(b->impl);
>   reg->num_components = 4;
> - reg->num_array_elems = array_size;
> -
> - for (i = 0; i < array_size; i++) {
> -c->temp_regs[decl->Range.First + i].reg = reg;
> -c->temp_regs[decl->Range.First + i].offset = i;
> - }
> -  } else {
> - for (i = 0; i < array_size; i++) {
> -reg = nir_local_reg_create(b->impl);
> -reg->num_components = 4;
> -c->temp_regs[decl->Range.First + i].reg = reg;
> -c->temp_regs[decl->Range.First + i].offset = 0;
> - }
> + c->temp_regs[decl->Range.First + i].reg = reg;
> + c->temp_regs[decl->Range.First + i].var = var;
> + c->temp_regs[decl->Range.First + i].offset = i;

Continuing to use array_size here doesn't make any sense to me, since if
you're not handling variable array indices when generating stores into
the array.  So all you want is a single vec4 reg available so that you
have something that our ALU op generation can do writemasked stores
into, and you're picking an arbitrary one of them in ttn_get_dest().

I think this would make a ton more sense if ttn_get_dest() just returned
a new vec4 local reg for the temporary, instead of having this
sort-of-shadow thing.

>}
> } else if (file == TGSI_FILE_ADDRESS) {
>c->addr_reg = nir_local_reg_create(b->impl);
> @@ -245,6 +253,32 @@ ttn_emit_immediate(struct ttn_compile *c)
>  static nir_src *
>  ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register 
> *indirect);
>  
> +/* generate either a constant or indirect deref chain for accessing an
> + * array variable.
> + */
> +static nir_deref_var *
> +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
> +struct tgsi_ind_register *indirect)
> +{
> +   nir_builder *b = &c->build;
> +   nir_deref_var *deref = nir_deref_var_create(b->shader, var);
> +   nir_deref_array *arr = nir_deref_array_create(b->shader);
> +
> +   arr->base_offset = offset;
> +   arr->deref.type = glsl_get_array_element(var->type);
> +
> +   if (indirect) {
> +  arr->deref_array_type = nir_deref_array_type_indirect;
> +  arr->indirect = nir_src_for_reg(c->addr_reg);
> +   } else {
> +  arr->deref_array_type = nir_deref_array_type_direct;
> +   }
> +
> +   deref->deref.child = &arr->deref;
> +
> +   return deref;
> +}
> +
>  static nir_src
>  ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned 
> index,
> struct tgsi_ind_register *indirect)
> @@ -256,10 +290,25 @@ ttn_src_for_file_and_index(struct ttn_com

Re: [Mesa-dev] [PATCH] Fix automatic indentation mode for recent emacs, use fewer columns in .git

2015-04-08 Thread Carl Worth

On Wed, Apr 08 2015, Neil Roberts wrote:
> It seems a bit strange that this has stopped working for you.

Yes. I don't understand exactly what's going on.

> mode. The C and C++ modes both inherit from prog-mode, as well as a
> bunch of other ones such as Python and lisp files.

That's what I guessed, (given that we have "prog-mode" in our files). I
tried investigating a little bit, but didn't get too far.

From an editor session editing an emacs file, (whether my standard
environment or with "emacs -q"), "M-x describe-mode" says:

C/l mode:
Major mode for editing K&R and ANSI C code.

Which looks pretty standard to me. But I don't know what the identifier
for this mode would be to specify it in the .dir-locals.el file nor what
modes it inherits from.

> If you are using a
> non-standard mode for C files it would be surprising if it doesn't also
> inherit from prog-mode. I have just tested this with emacs -q (to
> prevent it from loading my personal config) on Emacs 24.3.1 and it does
> work as is.

No non-standard mode here, (at least not intentionally). And I also
verified the behavior is the same with "emacs -q". Maybe there's a
Debian-specific bug that I'm hitting here?

> I don't think the patch would break anything for me since you explicitly
> set the fill-column back to 70 for commit messages so I don't care
> enough to complain if you want to commit it anyway, but it does seem
> like something fishy is going on and the reasoning in the commit message
> doesn't add up.

I won't disagree there. I don't know the actual root cause, but since
this fixes an actual problem for me, and we haven't identified any
negative side effects, I'll plan to commit this change.

And if anyone can diagnose the root cause and improve .dir-locals.el
further, that will be fine too.

-Carl

pgpD6Q3OM6D8Y.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Value Range Propagation in NIR (GSoC)

On Tue, Apr 7, 2015 at 4:52 PM, Connor Abbott  wrote:
> Hi Thomas,
>
> Thanks for submitting a proposal! Some comments/answers below.
>
> On Tue, Apr 7, 2015 at 3:34 PM, Thomas Helland
>  wrote:
>> Hi,
>>
>> For those that don't know I've submitted a proposal for this years GSoC.
>> I've proposed to implement value range propagation and loop unrolling in
>> NIR.
>> Since I'm no expert on compilers I've read up on some litterature:
>>
>> I started with "Constant propagation with conditional branches"  (thanks
>> Connor).
>> This paper describes an algorithm, "sparse conditional constant
>> propagation",
>> that seems to be the defacto standard in compilers today.
>>
>> I also found the paper;
>> "Accurate static branch prediction by value range propagation " (VRP).
>> This describes a value range propagation implementation based on SCCP.
>> (This also allows one to set heuristics to calculate educated guesses for
>> the
>> probability of a certain branch, but that's probably more than we're
>> interested in.)
>
> Thanks for mentioning that... I had forgotten the name of that paper.
> You're right in that the branch probability stuff isn't too useful for
> us. Also, it raises an important issue about back-edges from phi
> nodes; they present a more sophisticated method to handle it, but I
> think that for now we can just force back edges to have an infinite
> range unless they're constant.
>
>>
>> There is also a GCC paper (with whatever licensing issues that may apply);
>> "A propagation engine for GCC".
>> They have a shared engine for doing all propagation passes.
>> It handles the worklists, and the logic to traverse these.
>> The implementing passes then supply callbacks to define the lattice rules.
>> They reply back if the instruction was interesting or not,
>> and the propagation engine basically handles the rest.
>>
>> Maybe that's an interesting solution? Or it might not be worth the hassle?
>> We already have copy propagation, and with value range propagation
>> we probably don't want separate constant propagation?
>> (I'm hoping to write the pass so that it handles both constants and value
>> ranges.)
>
> Yes, constant propagation probably won't be so useful once we have value
> range propagation; the former is a special case of the latter. Note
> that we have a nifty way of actually doing the constant folding
> (nir_constant_expressions.py and nir_constant_expressions.h), which
> you should still use if all the inputs are constant.

When I started taking a stab at range propagation, I started by trying
to extend the constant folding framework.  I had a patch
(http://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/nir-minmax)
but it doesn't do nearly as much as I remembered.  I don't know if
it's practical to try and extend it or if we're better off just
hand-rolling whatever we do for range handling.

>> The GCC guys have used this engine to get copy propagation that propagates
>> copies accross conditionals, maybe this makes such a solution more
>> interesting?
>
> I'm not so sure how useful such a general framework will be. Constant
> propagation that handles back-edges seems interesting, but I'm not
> sure it's worth the time to implement something this general as a
> first pass.

Agreed.  Let's just get it working first.

>>
>> Connor: I just remembered you saying something about your freedesktop
>> git repo, so I poked around some and found that you have already done
>> some work on VRP based on SCCP? How far did you get?
>
> I started on it, but then I realized that the approach I was using was
> too cumbersome/complicated so I don't think what I have is too useful.
> Feel free to work on it yourself, although Jason and I have discussed
> it so we have some ideas of how to do it. I've written a few notes on
> this below that you may find useful.
>
> - I have a branch I created while working on VRP that you'll probably
> find useful: http://cgit.freedesktop.org/~cwabbott0/mesa/log/?h=nir-worklist
> . The first two commits are already in master, but the last two should
> be useful for implementing SCCP/VRP (although they'll need to be
> rebased, obviously).
>
> - There's a comment in the SCCP paper (5.3, Nodes versus Edges) that
> says: "An alternative way of implementing this would be to add nodes
> to the
> graph and then associate an ExecutableFlag with each node. An
> additional node must be inserted between any node that has more than
> one immediate successor and any successor node that has more than one
> immediate predecessor." I think this procedure is what's usually
> called "splitting critical edges"; in NIR, thanks to the structured
> control flow, there are never any critical edges except for one edge
> case you don't really have to care about too much (namely, an infinite
> loop with one basic block) and therefore you can just use the basic
> block worklist that I added in the branch mentioned above, rather than
> a worklist of basic block edges as the paper describes.
>
> - Th

Re: [Mesa-dev] [PATCH] scons: add target gallium-osmesa

Hi Olivier

Thanks for the patch !

Adding Jose to the Cc list as I believe he'll have some input on the topic.

On 3 April 2015 at 15:06,   wrote:
> From: Olivier Pena 
>
> ---
>  src/gallium/SConscript   |  5 
>  src/gallium/state_trackers/osmesa/SConscript | 25 +
>  src/gallium/state_trackers/osmesa/osmesa.def | 16 +++
>  src/gallium/targets/osmesa/SConscript| 41 
> 
>  4 files changed, 87 insertions(+)
>  create mode 100644 src/gallium/state_trackers/osmesa/SConscript
>  create mode 100644 src/gallium/state_trackers/osmesa/osmesa.def
>  create mode 100644 src/gallium/targets/osmesa/SConscript
>
Can you add the three new files into the EXTRA_DIST variable in the
relevant Makefile.am ? This way one we can build scons gallium-osmesa
from a release tarball :-)

> diff --git a/src/gallium/SConscript b/src/gallium/SConscript
> index 680ad92..eeb1c78 100644
> --- a/src/gallium/SConscript
> +++ b/src/gallium/SConscript
> @@ -60,6 +60,11 @@ SConscript([
>  ])
>
>  if not env['embedded']:
> +SConscript([
> +'state_trackers/osmesa/SConscript',
> +'targets/osmesa/SConscript',
> +])
> +
>  if env['x11']:
>  SConscript([
>  'state_trackers/glx/xlib/SConscript',
> diff --git a/src/gallium/state_trackers/osmesa/SConscript 
> b/src/gallium/state_trackers/osmesa/SConscript
> new file mode 100644
> index 000..fa7c968
> --- /dev/null
> +++ b/src/gallium/state_trackers/osmesa/SConscript
> @@ -0,0 +1,25 @@
> +import os
> +
> +Import('*')
> +
> +env = env.Clone()
> +
> +env.Append(CPPPATH = [
> +'#src/mapi',
> +'#src/mesa',
> +'.',
> +])
> +
> +env.AppendUnique(CPPDEFINES = [
> +'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
> +'WIN32_LEAN_AND_MEAN', # 
> http://msdn2.microsoft.com/en-us/library/6dwk3a1z.aspx
> +])
> +if not env['gles']:
> +# prevent _glapi_* from being declared __declspec(dllimport)
> +env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS'])
> +
Shouldn't these be used when building for windows only ?

> +st_osmesa = env.ConvenienceLibrary(
> +target ='st_osmesa',
> +source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'),
> +)
> +Export('st_osmesa')
> diff --git a/src/gallium/state_trackers/osmesa/osmesa.def 
> b/src/gallium/state_trackers/osmesa/osmesa.def
> new file mode 100644
> index 000..e2a31ab
> --- /dev/null
> +++ b/src/gallium/state_trackers/osmesa/osmesa.def
Can we move this file next to it's only user - i.e. into targets/osmesa/ ?

> @@ -0,0 +1,16 @@
> +;DESCRIPTION 'Mesa OSMesa lib for Win32'
> +VERSION 4.1
> +
> +EXPORTS
> +   OSMesaCreateContext
> +   OSMesaCreateContextExt
> +   OSMesaDestroyContext
> +   OSMesaMakeCurrent
> +   OSMesaGetCurrentContext
> +   OSMesaPixelStore
> +   OSMesaGetIntegerv
> +   OSMesaGetDepthBuffer
> +   OSMesaGetColorBuffer
> +   OSMesaGetProcAddress
> +   OSMesaColorClamp
> +   OSMesaPostprocess
> diff --git a/src/gallium/targets/osmesa/SConscript 
> b/src/gallium/targets/osmesa/SConscript
> new file mode 100644
> index 000..2c936cf
> --- /dev/null
> +++ b/src/gallium/targets/osmesa/SConscript
> @@ -0,0 +1,41 @@
> +Import('*')
> +
> +env = env.Clone()
> +
> +env.Prepend(CPPPATH = [
> +'#src/mapi',
> +'#src/mesa',
> +#Dir('../../../mapi'), # src/mapi build path for python-generated GL API 
> files/headers
> +])
> +
> +sources = [
> +'target.c',
> +]
> +sources += ['#src/gallium/state_trackers/osmesa/osmesa.def']
> +
Afaict this should be included only if the target is Windows.

> +drivers = []
> +
> +if env['llvm']:
> +env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
> +env.Append(CPPDEFINES = 'GALLIUM_TRACE')
> +drivers += [llvmpipe]
> +else:
> +env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE')
> +env.Append(CPPDEFINES = 'GALLIUM_TRACE')
> +drivers += [softpipe]
> +
One should include softpipe unconditionally as we can switch between
llvmpipe and softpipe at runtime.

> +if env['platform'] == 'windows':
> +env.AppendUnique(CPPDEFINES = [
> +'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
> +])
> +if not env['gles']:
> +# prevent _glapi_* from being declared __declspec(dllimport)
> +env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS'])
> +
Don't think you need this if block.

> +gallium_osmesa = env.SharedLibrary(
> +target ='osmesa',
> +source = sources,
> +LIBS = drivers + st_osmesa + ws_null + glapi + mesa + gallium + 
> trace + glsl + mesautil + env['LIBS'],
How about we move this before the SharedLibrary construct and use
env.Prepend(LIBS =...  like other places in mesa ?

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: remove bogus r600-- triple

As mentioned by Michel Dänzer for LLVM >= 3.6 we create the
LLVMTargetMachine (with triple amdgcn--), as we setup the radeonsi
context. For older LLVM or hardware (r600) the triple is always r600--
and is created at a later stage - radeon_llvm_compile()

Cc: Michel Dänzer 
Signed-off-by: Emil Velikov 
---
 src/gallium/drivers/radeonsi/si_pipe.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index e761d20..5ea8868 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -85,8 +85,6 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen, void *
LLVMTargetRef r600_target;
 #if HAVE_LLVM >= 0x0306
const char *triple = "amdgcn--";
-#else
-   const char *triple = "r600--";
 #endif
int shader, i;
 
-- 
2.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] clover: Call clBuildProgram() notification function when build completes

Hi Tom,

Just a friendly reminder that this patch hasn't landed in master yet.
Just making sure it doesn't fall through the cracks :-)

Cheers
Emil

On 24 March 2015 at 19:44, Tom Stellard  wrote:
> Cc: 10.5 10.4 
> ---
>  src/gallium/state_trackers/clover/api/program.cpp | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
> b/src/gallium/state_trackers/clover/api/program.cpp
> index 60184ed..fcec1d7 100644
> --- a/src/gallium/state_trackers/clover/api/program.cpp
> +++ b/src/gallium/state_trackers/clover/api/program.cpp
> @@ -180,8 +180,12 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs,
> validate_build_program_common(prog, num_devs, d_devs, pfn_notify, 
> user_data);
>
> prog.build(devs, opts);
> +   if (pfn_notify)
> +  pfn_notify(d_prog, user_data);
> return CL_SUCCESS;
>  } catch (error &e) {
> +   if (pfn_notify)
> +  pfn_notify(d_prog, user_data);
> if (e.get() == CL_INVALID_COMPILER_OPTIONS)
>return CL_INVALID_BUILD_OPTIONS;
> if (e.get() == CL_COMPILE_PROGRAM_FAILURE)
> --
> 2.0.4
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] i965: Add XRGB8888 format to intel_screen_make_configs

Hi all,

Can we get a pair of eyes on this patch please ?

Boyan
For the future can you please include the CC mesa-stable line in the
commit message. It will make things a bit more obvious as I'm pursing
through the list :-)

Thanks
Emil

On 25 March 2015 at 11:36, Boyan Ding  wrote:
> Some application, such as drm backend of weston, uses XRGB config as
> default. i965 doesn't provide this format, but before commit 65c8965d,
> the drm platform of EGL takes ARGB as XRGB. Now that commit
> 65c8965d makes EGL recognize format correctly so weston won't start
> because it can't find XRGB. Add XRGB format to i965 just as
> other drivers do.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
> Signed-off-by: Boyan Ding 
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 3640b67..2b82c33 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1126,7 +1126,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
>  {
> static const mesa_format formats[] = {
>MESA_FORMAT_B5G6R5_UNORM,
> -  MESA_FORMAT_B8G8R8A8_UNORM
> +  MESA_FORMAT_B8G8R8A8_UNORM,
> +  MESA_FORMAT_B8G8R8X8_UNORM
> };
>
> /* GLX_SWAP_COPY_OML is not supported due to page flipping. */
> --
> 2.3.3
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

On Wed, Apr 8, 2015 at 2:25 AM, Martin Peres
 wrote:
> On 08/04/15 10:06, Kenneth Graunke wrote:
>>
>> Previously, we translated into NIR and did all the optimizations and
>> lowering as part of running fs_visitor.  This meant that we did all of
>> that work twice for fragment shaders - once for SIMD8, and again for
>> SIMD16.  We also had to redo it every time we hit a state based
>> recompile.
>>
>> We now generate NIR once at link time.  ARB programs don't have linking,
>> so we instead generate it at ProgramStringNotify time.
>>
>> Mesa's fixed function vertex program handling doesn't bother to inform
>> the driver about new programs at all (which is rather mean), so we
>> generate NIR at the last minute, if it hasn't happened already.
>>
>> shader-db runs ~9.4% faster on my i7-5600U, with a release build.
>
>
> Nice speed improvement but wouldn't it affect negatively programs using SSO
> to recombine shaders at run time?

No.  I think with SSO we basically just delay linking until they
actually use the shader.  The same linking function gets called either
way.

>>
>> Signed-off-by: Kenneth Graunke 
>> ---
>>   src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>>   src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 174 +--
>>   src/mesa/drivers/dri/i965/brw_nir.c| 213
>> +
>>   src/mesa/drivers/dri/i965/brw_nir.h|   6 +
>>   src/mesa/drivers/dri/i965/brw_program.c|   7 +
>>   src/mesa/drivers/dri/i965/brw_shader.cpp   |   6 +
>>   src/mesa/drivers/dri/i965/brw_vec4.cpp |  17 ++-
>>   src/mesa/main/mtypes.h |   2 +
>>   src/mesa/program/program.c |   5 +
>>   9 files changed, 255 insertions(+), 176 deletions(-)
>>   create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c
>>
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources
>> b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 498d5a7..6d4659f 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -77,6 +77,7 @@ i965_FILES = \
>> brw_misc_state.c \
>> brw_multisample_state.h \
>> brw_nir.h \
>> +   brw_nir.c \
>> brw_nir_analyze_boolean_resolves.c \
>> brw_object_purgeable.c \
>> brw_packed_float.c \
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 034b79a..ccffd5d 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -28,175 +28,10 @@
>>   #include "brw_fs.h"
>>   #include "brw_nir.h"
>>   -static void
>> -nir_optimize(nir_shader *nir)
>> -{
>> -   bool progress;
>> -   do {
>> -  progress = false;
>> -  nir_lower_vars_to_ssa(nir);
>> -  nir_validate_shader(nir);
>> -  nir_lower_alu_to_scalar(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_copy_prop(nir);
>> -  nir_validate_shader(nir);
>> -  nir_lower_phis_to_scalar(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_copy_prop(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_dce(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_cse(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_peephole_select(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_algebraic(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_constant_folding(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_remove_phis(nir);
>> -  nir_validate_shader(nir);
>> -   } while (progress);
>> -}
>> -
>> -static bool
>> -count_nir_instrs_in_block(nir_block *block, void *state)
>> -{
>> -   int *count = (int *) state;
>> -   nir_foreach_instr(block, instr) {
>> -  *count = *count + 1;
>> -   }
>> -   return true;
>> -}
>> -
>> -static int
>> -count_nir_instrs(nir_shader *nir)
>> -{
>> -   int count = 0;
>> -   nir_foreach_overload(nir, overload) {
>> -  if (!overload->impl)
>> - continue;
>> -  nir_foreach_block(overload->impl, count_nir_instrs_in_block,
>> &count);
>> -   }
>> -   return count;
>> -}
>> -
>>   void
>>   fs_visitor::emit_nir_code()
>>   {
>> -   const nir_shader_compiler_options *options =
>> -  ctx->Const.ShaderCompilerOptions[stage].NirOptions;
>> -
>> -   nir_shader *nir;
>> -   /* First, lower the GLSL IR or Mesa IR to NIR */
>> -   if (shader_prog) {
>> -  nir = glsl_to_nir(&shader->base, options);
>> -   } else {
>> -  nir = prog_to_nir(prog, options);
>> -  nir_convert_to_ssa(nir); /* turn registers into SSA */
>> -   }
>> -   nir_validate_shader(nir);
>> -
>> -   nir_lower_global_vars_to_local(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_lower_tex_projector(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_normalize_cubemap_coords(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_split_var_copies(nir);
>> -   nir_validate_shader(nir

Re: [Mesa-dev] [PATCH 10/12] nir: Store num_direct_uniforms in the nir_shader.

On Wed, Apr 8, 2015 at 12:06 AM, Kenneth Graunke  wrote:
> Storing this here is pretty sketchy - I don't know if any driver other
> than i965 will want to use it.  But this will make it a lot easier to
> generate NIR code at link time.  We'll probably rework it anyway.

Yeah, it's sketchy but, honestly, storing the number of
uniforms/inputs/outputs is also kinda scketchy.  I can't think of
anythiing better at the moment so let's just go with it.

> Signed-off-by: Kenneth Graunke 
> ---
>  src/glsl/nir/nir.h   | 3 +++
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++--
>  2 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index e844e4d..7d11996 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1429,6 +1429,9 @@ typedef struct nir_shader {
>  * access plus one
>  */
> unsigned num_inputs, num_uniforms, num_outputs;
> +
> +   /** the number of uniforms that are only accessed directly */
> +   unsigned num_direct_uniforms;
>  } nir_shader;
>
>  #define nir_foreach_overload(shader, overload)\
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 145a447..034b79a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -120,7 +120,7 @@ fs_visitor::emit_nir_code()
>
> if (shader_prog) {
>nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
> -   &num_direct_uniforms,
> +   &nir->num_direct_uniforms,
> &nir->num_uniforms);
> } else {
>/* ARB programs generally create a giant array of "uniform" data, and 
> allow
> @@ -128,7 +128,7 @@ fs_visitor::emit_nir_code()
> * analysis, it's all or nothing.  num_direct_uniforms is only useful 
> when
> * we have some direct and some indirect access; it doesn't matter 
> here.
> */
> -  num_direct_uniforms = 0;
> +  nir->num_direct_uniforms = 0;
> }
> nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
> nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
> @@ -343,6 +343,7 @@ void
>  fs_visitor::nir_setup_uniforms(nir_shader *shader)
>  {
> uniforms = shader->num_uniforms;
> +   num_direct_uniforms = shader->num_direct_uniforms;
>
> /* We split the uniform register file in half.  The first half is
>  * entirely direct uniforms.  The second half is indirect.
> --
> 2.3.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/12] i965: Generate NIR at link time

Series is

Reviewed-by: Jason Ekstrand 

On Wed, Apr 8, 2015 at 12:06 AM, Kenneth Graunke  wrote:
> Hello,
>
> This series makes i965 generate NIR at link time (or ProgramStringNotify
> time for ARB programs), rather than on each FS/VS compile.  This means
> we only do it once, rather than for SIMD8 and again for SIMD16 programs.
> It also means we can avoid it when doing state based recompiles.
>
> It speeds up shader-db on my Broadwell by about 9.4%.
>
> It also adds INTEL_DEBUG=ann support, now that we keep a persistent copy
> of the NIR program around for the annotations to refer to.
>
> Available in the 'nir-link' branch of ~kwg/mesa.
>
> --Ken
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] clover: Update the wait_count of the correct event when chaining events

Hi Tom,

Ping for patch#2 for clover. Do let me know if either one is no longer
applicable.

Thanks
Emil

On 25 March 2015 at 17:43, Tom Stellard  wrote:
> Cc: 10.5 10.4 
> ---
>  src/gallium/state_trackers/clover/core/event.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
> b/src/gallium/state_trackers/clover/core/event.cpp
> index 58de888..9d78b48 100644
> --- a/src/gallium/state_trackers/clover/core/event.cpp
> +++ b/src/gallium/state_trackers/clover/core/event.cpp
> @@ -67,7 +67,7 @@ event::signalled() const {
>  void
>  event::chain(event &ev) {
> if (wait_count) {
> -  ev.wait_count++;
> +  wait_count++;
>_chain.push_back(ev);
> }
> ev.deps.push_back(*this);
> --
> 2.0.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] clover: Update the wait_count of the correct event when chaining events

2015-04-08 Thread Tom Stellard

On Wed, Apr 08, 2015 at 07:41:55PM +0100, Emil Velikov wrote:
> Hi Tom,
> 
> Ping for patch#2 for clover. Do let me know if either one is no longer
> applicable.
> 

This patch was rejected, it's no longer applicable.

-Tom

> Thanks
> Emil
> 
> On 25 March 2015 at 17:43, Tom Stellard  wrote:
> > Cc: 10.5 10.4 
> > ---
> >  src/gallium/state_trackers/clover/core/event.cpp | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
> > b/src/gallium/state_trackers/clover/core/event.cpp
> > index 58de888..9d78b48 100644
> > --- a/src/gallium/state_trackers/clover/core/event.cpp
> > +++ b/src/gallium/state_trackers/clover/core/event.cpp
> > @@ -67,7 +67,7 @@ event::signalled() const {
> >  void
> >  event::chain(event &ev) {
> > if (wait_count) {
> > -  ev.wait_count++;
> > +  wait_count++;
> >_chain.push_back(ev);
> > }
> > ev.deps.push_back(*this);
> > --
> > 2.0.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] gallivm: don't use control flow when doing indirect constant buffer lookups

2015-04-08 Thread Jose Fonseca


Series looks good to me.

Just a few suggestions inline.


On 04/04/15 15:50, srol...@vmware.com wrote:

From: Roland Scheidegger 

llvm goes crazy when doing that, using way more memory and time, though there's
probably more to it - this points to a very much similar issue as fixed in
8a9f5ecdb116d0449d63f7b94efbfa8b205d826f. In any case I've seen a quite
plain looking vertex shader with just ~50 simple tgsi instructions (but with a
dozen or so such indirect constant buffer lookups) go from a terribly high
~440ms compile time (consuming 25MB of memory in the process) down to a still
awful ~230ms and 13MB with this fix (with llvm 3.3), so there's still obvious
improvements possible (but I have no clue why it's so slow...).
The resulting shader is most likely also faster (certainly seemed so though
I don't have any hard numbers as it may have been influenced by compile times)
since generally fetching constants outside the buffer range is most likely an
app error (that is we expect all indices to be valid).
It is possible this fixes some mysterious vertex shader slowdowns we've seen
ever since we are conforming to newer apis at least partially (the main draw
loop also has similar looking conditionals which we probably could do without -
if not for the fetch at least for the additional elts condition.)
---
  src/gallium/auxiliary/draw/draw_llvm.h |  2 +
  .../draw/draw_pt_fetch_shade_pipeline_llvm.c   | 27 +++---
  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 95 +-
  src/gallium/drivers/llvmpipe/lp_scene.h|  2 +
  src/gallium/drivers/llvmpipe/lp_setup.c|  6 +-
  5 files changed, 63 insertions(+), 69 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.h 
b/src/gallium/auxiliary/draw/draw_llvm.h
index 9565fc6..a1983e1 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -472,6 +472,8 @@ struct draw_llvm {

 struct draw_gs_llvm_variant_list_item gs_variants_list;
 int nr_gs_variants;
+
+   float fake_const_buf[4];


Couldn't we make fake_const_buf a mere local static const array instead? 
It would save memory.



  };


diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c 
b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 0dfafdc..03257d8 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -273,28 +273,35 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end 
*middle)
  {
 struct llvm_middle_end *fpme = llvm_middle_end(middle);
 struct draw_context *draw = fpme->draw;
+   struct draw_llvm *llvm = fpme->llvm;
 unsigned i;

-   for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) {
+   for (i = 0; i < Elements(llvm->jit_context.vs_constants); ++i) {
int num_consts =
   draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4);
-  fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
-  fpme->llvm->jit_context.num_vs_constants[i] = num_consts;
+  llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
+  llvm->jit_context.num_vs_constants[i] = num_consts;
+  if (num_consts == 0) {
+ llvm->jit_context.vs_constants[i] = llvm->fake_const_buf;
+  }
 }
-   for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) {
+   for (i = 0; i < Elements(llvm->gs_jit_context.constants); ++i) {
int num_consts =
   draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4);
-  fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
-  fpme->llvm->gs_jit_context.num_constants[i] = num_consts;
+  llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
+  llvm->gs_jit_context.num_constants[i] = num_consts;
+  if (num_consts == 0) {
+ llvm->gs_jit_context.constants[i] = llvm->fake_const_buf;
+  }
 }

-   fpme->llvm->jit_context.planes =
+   llvm->jit_context.planes =
(float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
-   fpme->llvm->gs_jit_context.planes =
+   llvm->gs_jit_context.planes =
(float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];

-   fpme->llvm->jit_context.viewports = draw->viewports;
-   fpme->llvm->gs_jit_context.viewports = draw->viewports;
+   llvm->jit_context.viewports = draw->viewports;
+   llvm->gs_jit_context.viewports = draw->viewports;
  }


diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 17b68ff..5aa2846 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -944,20 +944,39 @@ gather_outputs(struct lp_build_tgsi_soa_context * bld)
   * with a little work.
   */
  static LLVMValueRef
-build_gather(struct lp_build_context *bld,
+build_gather(struct lp_build_tgsi_context *

Re: [Mesa-dev] [PATCH] gallium/ttn: add support for temp arrays

On Wed, Apr 8, 2015 at 11:14 AM, Eric Anholt  wrote:
> Rob Clark  writes:
>
>> From: Rob Clark 
>>
>> Since the rest of NIR really would rather have these as variables rather
>> than registers, create a nir_variable per array.  But rather than
>> completely re-arrange ttn to be variable based rather than register
>> based, keep the registers.  In the cases where there is a matching var
>> for the reg, ttn_emit_instruction will append the appropriate intrinsic
>> to get things back from the shadow reg into the variable.
>>
>> NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give
>> an array id.  But those just kinda suck, and should really go away.
>> AFAICT we don't get those from glsl.  Might be an issue for some other
>> state tracker.
>>
>> v2: rework to use load_var/store_var with deref chains
>>
>> Signed-off-by: Rob Clark 
>> ---
>>  src/gallium/auxiliary/nir/tgsi_to_nir.c | 122 
>> +++-
>>  1 file changed, 103 insertions(+), 19 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
>> b/src/gallium/auxiliary/nir/tgsi_to_nir.c
>> index da935a4..f4c0bad 100644
>> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
>> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
>> @@ -44,6 +44,7 @@
>>  struct ttn_reg_info {
>> /** nir register containing this TGSI index. */
>> nir_register *reg;
>> +   nir_variable *var;
>> /** Offset (in vec4s) from the start of var for this TGSI index. */
>> int offset;
>>  };
>> @@ -121,22 +122,29 @@ ttn_emit_declaration(struct ttn_compile *c)
>>
>> if (file == TGSI_FILE_TEMPORARY) {
>>nir_register *reg;
>> -  if (c->scan->indirect_files & (1 << file)) {
>> +  nir_variable *var = NULL;
>> +
>> +  if (decl->Declaration.Array) {
>> + /* for arrays, the register created just serves as a
>> +  * shadow register.  We append intrinsic_store_global
>> +  * after the tgsi instruction is translated to move
>> +  * back from the shadow register to the variable
>> +  */
>> + var = rzalloc(b->shader, nir_variable);
>> +
>> + var->type = glsl_array_type(glsl_vec4_type(), array_size);
>> + var->data.mode = nir_var_global;
>> + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
>> +
>> + exec_list_push_tail(&b->shader->globals, &var->node);
>> +  }
>> +
>> +  for (i = 0; i < array_size; i++) {
>>   reg = nir_local_reg_create(b->impl);
>>   reg->num_components = 4;
>> - reg->num_array_elems = array_size;
>> -
>> - for (i = 0; i < array_size; i++) {
>> -c->temp_regs[decl->Range.First + i].reg = reg;
>> -c->temp_regs[decl->Range.First + i].offset = i;
>> - }
>> -  } else {
>> - for (i = 0; i < array_size; i++) {
>> -reg = nir_local_reg_create(b->impl);
>> -reg->num_components = 4;
>> -c->temp_regs[decl->Range.First + i].reg = reg;
>> -c->temp_regs[decl->Range.First + i].offset = 0;
>> - }
>> + c->temp_regs[decl->Range.First + i].reg = reg;
>> + c->temp_regs[decl->Range.First + i].var = var;
>> + c->temp_regs[decl->Range.First + i].offset = i;
>
> Continuing to use array_size here doesn't make any sense to me, since if
> you're not handling variable array indices when generating stores into
> the array.  So all you want is a single vec4 reg available so that you
> have something that our ALU op generation can do writemasked stores
> into, and you're picking an arbitrary one of them in ttn_get_dest().
>
> I think this would make a ton more sense if ttn_get_dest() just returned
> a new vec4 local reg for the temporary, instead of having this
> sort-of-shadow thing.
>

so the shadow registers did make things like:

DCL TEMP[0..2], ARRAY(1), LOCAL

  1: MOV TEMP[1].x, IN[1].
  2: MOV TEMP[1].yz, IN[2].yxyy

much easier to deal with.. I'm still thinking about how to handle that
w/ the create-new-temp-register-each-time approach.. but yeah, doesn't
work as well if you have indirect dst.

BR,
-R




>>}
>> } else if (file == TGSI_FILE_ADDRESS) {
>>c->addr_reg = nir_local_reg_create(b->impl);
>> @@ -245,6 +253,32 @@ ttn_emit_immediate(struct ttn_compile *c)
>>  static nir_src *
>>  ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register 
>> *indirect);
>>
>> +/* generate either a constant or indirect deref chain for accessing an
>> + * array variable.
>> + */
>> +static nir_deref_var *
>> +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
>> +struct tgsi_ind_register *indirect)
>> +{
>> +   nir_builder *b = &c->build;
>> +   nir_deref_var *deref = nir_deref_var_create(b->shader, var);
>> +   nir_deref_array *arr = nir_deref_array_create(b->shader);
>> +
>> +   arr->base_offset = offset;
>> +   arr->deref.type = glsl_get_array_element(var->type);
>> +
>> +   if (indirect) {
>> +

Re: [Mesa-dev] [PATCH 10/12] nir: Store num_direct_uniforms in the nir_shader.

On 04/08/2015 12:06 AM, Kenneth Graunke wrote:
> Storing this here is pretty sketchy - I don't know if any driver other
> than i965 will want to use it.  But this will make it a lot easier to
> generate NIR code at link time.  We'll probably rework it anyway.
> 
> Signed-off-by: Kenneth Graunke 
> ---
>  src/glsl/nir/nir.h   | 3 +++
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 5 +++--
>  2 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index e844e4d..7d11996 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1429,6 +1429,9 @@ typedef struct nir_shader {
>  * access plus one
>  */
> unsigned num_inputs, num_uniforms, num_outputs;
> +
> +   /** the number of uniforms that are only accessed directly */
> +   unsigned num_direct_uniforms;
>  } nir_shader;
>  
>  #define nir_foreach_overload(shader, overload)\
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 145a447..034b79a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -120,7 +120,7 @@ fs_visitor::emit_nir_code()
>  
> if (shader_prog) {
>nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
> -   &num_direct_uniforms,
> +   &nir->num_direct_uniforms,
> &nir->num_uniforms);

Why not just have nir_assign_var_locations_scalar_direct_first modify
the nir_shader passed in?  That seems more concise.

> } else {
>/* ARB programs generally create a giant array of "uniform" data, and 
> allow
> @@ -128,7 +128,7 @@ fs_visitor::emit_nir_code()
> * analysis, it's all or nothing.  num_direct_uniforms is only useful 
> when
> * we have some direct and some indirect access; it doesn't matter 
> here.
> */
> -  num_direct_uniforms = 0;
> +  nir->num_direct_uniforms = 0;
> }
> nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
> nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
> @@ -343,6 +343,7 @@ void
>  fs_visitor::nir_setup_uniforms(nir_shader *shader)
>  {
> uniforms = shader->num_uniforms;
> +   num_direct_uniforms = shader->num_direct_uniforms;
>  
> /* We split the uniform register file in half.  The first half is
>  * entirely direct uniforms.  The second half is indirect.
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

On 04/08/2015 02:25 AM, Martin Peres wrote:
> On 08/04/15 10:06, Kenneth Graunke wrote:
>> Previously, we translated into NIR and did all the optimizations and
>> lowering as part of running fs_visitor.  This meant that we did all of
>> that work twice for fragment shaders - once for SIMD8, and again for
>> SIMD16.  We also had to redo it every time we hit a state based
>> recompile.
>>
>> We now generate NIR once at link time.  ARB programs don't have linking,
>> so we instead generate it at ProgramStringNotify time.
>>
>> Mesa's fixed function vertex program handling doesn't bother to inform
>> the driver about new programs at all (which is rather mean), so we
>> generate NIR at the last minute, if it hasn't happened already.
>>
>> shader-db runs ~9.4% faster on my i7-5600U, with a release build.
> 
> Nice speed improvement but wouldn't it affect negatively programs using
> SSO to recombine shaders at run time?

Hm... that's a fair question.  Does NIR do any cross-stage optimization?

>> Signed-off-by: Kenneth Graunke 
>> ---
>>   src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>>   src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 174
>> +--
>>   src/mesa/drivers/dri/i965/brw_nir.c| 213
>> +
>>   src/mesa/drivers/dri/i965/brw_nir.h|   6 +
>>   src/mesa/drivers/dri/i965/brw_program.c|   7 +
>>   src/mesa/drivers/dri/i965/brw_shader.cpp   |   6 +
>>   src/mesa/drivers/dri/i965/brw_vec4.cpp |  17 ++-
>>   src/mesa/main/mtypes.h |   2 +
>>   src/mesa/program/program.c |   5 +
>>   9 files changed, 255 insertions(+), 176 deletions(-)
>>   create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c
>>
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources
>> b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 498d5a7..6d4659f 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -77,6 +77,7 @@ i965_FILES = \
>>   brw_misc_state.c \
>>   brw_multisample_state.h \
>>   brw_nir.h \
>> +brw_nir.c \
>>   brw_nir_analyze_boolean_resolves.c \
>>   brw_object_purgeable.c \
>>   brw_packed_float.c \
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> index 034b79a..ccffd5d 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>> @@ -28,175 +28,10 @@
>>   #include "brw_fs.h"
>>   #include "brw_nir.h"
>>   -static void
>> -nir_optimize(nir_shader *nir)
>> -{
>> -   bool progress;
>> -   do {
>> -  progress = false;
>> -  nir_lower_vars_to_ssa(nir);
>> -  nir_validate_shader(nir);
>> -  nir_lower_alu_to_scalar(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_copy_prop(nir);
>> -  nir_validate_shader(nir);
>> -  nir_lower_phis_to_scalar(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_copy_prop(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_dce(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_cse(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_peephole_select(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_algebraic(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_constant_folding(nir);
>> -  nir_validate_shader(nir);
>> -  progress |= nir_opt_remove_phis(nir);
>> -  nir_validate_shader(nir);
>> -   } while (progress);
>> -}
>> -
>> -static bool
>> -count_nir_instrs_in_block(nir_block *block, void *state)
>> -{
>> -   int *count = (int *) state;
>> -   nir_foreach_instr(block, instr) {
>> -  *count = *count + 1;
>> -   }
>> -   return true;
>> -}
>> -
>> -static int
>> -count_nir_instrs(nir_shader *nir)
>> -{
>> -   int count = 0;
>> -   nir_foreach_overload(nir, overload) {
>> -  if (!overload->impl)
>> - continue;
>> -  nir_foreach_block(overload->impl, count_nir_instrs_in_block,
>> &count);
>> -   }
>> -   return count;
>> -}
>> -
>>   void
>>   fs_visitor::emit_nir_code()
>>   {
>> -   const nir_shader_compiler_options *options =
>> -  ctx->Const.ShaderCompilerOptions[stage].NirOptions;
>> -
>> -   nir_shader *nir;
>> -   /* First, lower the GLSL IR or Mesa IR to NIR */
>> -   if (shader_prog) {
>> -  nir = glsl_to_nir(&shader->base, options);
>> -   } else {
>> -  nir = prog_to_nir(prog, options);
>> -  nir_convert_to_ssa(nir); /* turn registers into SSA */
>> -   }
>> -   nir_validate_shader(nir);
>> -
>> -   nir_lower_global_vars_to_local(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_lower_tex_projector(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_normalize_cubemap_coords(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_split_var_copies(nir);
>> -   nir_validate_shader(nir);
>> -
>> -   nir_optimize(nir);
>> -
>> -   /* Lower a bunch of stuff */
>> -   nir_lower_var_

Re: [Mesa-dev] [PATCH 00/12] i965: Generate NIR at link time

Patches 1 through 9 and 12 are

Reviewed-by: Ian Romanick 

The other 3 have some comments / discussion.

On 04/08/2015 12:06 AM, Kenneth Graunke wrote:
> Hello,
> 
> This series makes i965 generate NIR at link time (or ProgramStringNotify
> time for ARB programs), rather than on each FS/VS compile.  This means
> we only do it once, rather than for SIMD8 and again for SIMD16 programs.
> It also means we can avoid it when doing state based recompiles.
> 
> It speeds up shader-db on my Broadwell by about 9.4%.
> 
> It also adds INTEL_DEBUG=ann support, now that we keep a persistent copy
> of the NIR program around for the annotations to refer to.
> 
> Available in the 'nir-link' branch of ~kwg/mesa.
> 
> --Ken
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] scons: add target gallium-osmesa

2015-04-08 Thread Jose Fonseca

Besides the issue Emil mentioned,  one minor request: lets call the 
target just "osmesa".  As we don't plan to have any other "osmesa" target.


Jose

On 08/04/15 18:18, Emil Velikov wrote:

Hi Olivier

Thanks for the patch !

Adding Jose to the Cc list as I believe he'll have some input on the topic.

On 3 April 2015 at 15:06,   wrote:

From: Olivier Pena 

---
  src/gallium/SConscript   |  5 
  src/gallium/state_trackers/osmesa/SConscript | 25 +
  src/gallium/state_trackers/osmesa/osmesa.def | 16 +++
  src/gallium/targets/osmesa/SConscript| 41 
  4 files changed, 87 insertions(+)
  create mode 100644 src/gallium/state_trackers/osmesa/SConscript
  create mode 100644 src/gallium/state_trackers/osmesa/osmesa.def
  create mode 100644 src/gallium/targets/osmesa/SConscript


Can you add the three new files into the EXTRA_DIST variable in the
relevant Makefile.am ? This way one we can build scons gallium-osmesa
from a release tarball :-)


diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 680ad92..eeb1c78 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -60,6 +60,11 @@ SConscript([
  ])

  if not env['embedded']:
+SConscript([
+'state_trackers/osmesa/SConscript',
+'targets/osmesa/SConscript',
+])
+
  if env['x11']:
  SConscript([
  'state_trackers/glx/xlib/SConscript',
diff --git a/src/gallium/state_trackers/osmesa/SConscript 
b/src/gallium/state_trackers/osmesa/SConscript
new file mode 100644
index 000..fa7c968
--- /dev/null
+++ b/src/gallium/state_trackers/osmesa/SConscript
@@ -0,0 +1,25 @@
+import os
+
+Import('*')
+
+env = env.Clone()
+
+env.Append(CPPPATH = [
+'#src/mapi',
+'#src/mesa',
+'.',
+])
+
+env.AppendUnique(CPPDEFINES = [
+'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
+'WIN32_LEAN_AND_MEAN', # 
https://urldefense.proofpoint.com/v2/url?u=http-3A__msdn2.microsoft.com_en-2Dus_library_6dwk3a1z.aspx&d=AwIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=V7sOetAjivzNtMiJzzOh63AXslqGWPwHWPoxHrHKbGs&s=2ddtnvnyotNzbqM7WTXS_y4myuI1d-lxwzZA9RPX34o&e=
+])
+if not env['gles']:
+# prevent _glapi_* from being declared __declspec(dllimport)
+env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS'])
+

Shouldn't these be used when building for windows only ?


+st_osmesa = env.ConvenienceLibrary(
+target ='st_osmesa',
+source = env.ParseSourceList('Makefile.sources', 'C_SOURCES'),
+)
+Export('st_osmesa')
diff --git a/src/gallium/state_trackers/osmesa/osmesa.def 
b/src/gallium/state_trackers/osmesa/osmesa.def
new file mode 100644
index 000..e2a31ab
--- /dev/null
+++ b/src/gallium/state_trackers/osmesa/osmesa.def

Can we move this file next to it's only user - i.e. into targets/osmesa/ ?


@@ -0,0 +1,16 @@
+;DESCRIPTION 'Mesa OSMesa lib for Win32'
+VERSION 4.1
+
+EXPORTS
+   OSMesaCreateContext
+   OSMesaCreateContextExt
+   OSMesaDestroyContext
+   OSMesaMakeCurrent
+   OSMesaGetCurrentContext
+   OSMesaPixelStore
+   OSMesaGetIntegerv
+   OSMesaGetDepthBuffer
+   OSMesaGetColorBuffer
+   OSMesaGetProcAddress
+   OSMesaColorClamp
+   OSMesaPostprocess
diff --git a/src/gallium/targets/osmesa/SConscript 
b/src/gallium/targets/osmesa/SConscript
new file mode 100644
index 000..2c936cf
--- /dev/null
+++ b/src/gallium/targets/osmesa/SConscript
@@ -0,0 +1,41 @@
+Import('*')
+
+env = env.Clone()
+
+env.Prepend(CPPPATH = [
+'#src/mapi',
+'#src/mesa',
+#Dir('../../../mapi'), # src/mapi build path for python-generated GL API 
files/headers
+])
+
+sources = [
+'target.c',
+]
+sources += ['#src/gallium/state_trackers/osmesa/osmesa.def']
+

Afaict this should be included only if the target is Windows.


+drivers = []
+
+if env['llvm']:
+env.Append(CPPDEFINES = 'GALLIUM_LLVMPIPE')
+env.Append(CPPDEFINES = 'GALLIUM_TRACE')
+drivers += [llvmpipe]
+else:
+env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE')
+env.Append(CPPDEFINES = 'GALLIUM_TRACE')
+drivers += [softpipe]
+

One should include softpipe unconditionally as we can switch between
llvmpipe and softpipe at runtime.


+if env['platform'] == 'windows':
+env.AppendUnique(CPPDEFINES = [
+'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers
+])
+if not env['gles']:
+# prevent _glapi_* from being declared __declspec(dllimport)
+env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS'])
+

Don't think you need this if block.


+gallium_osmesa = env.SharedLibrary(
+target ='osmesa',
+source = sources,
+LIBS = drivers + st_osmesa + ws_null + glapi + mesa + gallium + trace 
+ glsl + mesautil + env['LIBS'],

How about we move this before the SharedLibrary construct and use
env.Prepend(LIBS =...  like other places in mesa ?

Thanks
Emil



_

[Mesa-dev] [Bug 89823] [swrast] driver loads but complains then fails to work in Piglit which shows GLSL message

https://bugs.freedesktop.org/show_bug.cgi?id=89823

Dan Sebald  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |INVALID

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

On 04/08/2015 01:46 PM, Jason Ekstrand wrote:
> On Wed, Apr 8, 2015 at 12:53 PM, Ian Romanick  wrote:
>> On 04/08/2015 02:25 AM, Martin Peres wrote:
>>> On 08/04/15 10:06, Kenneth Graunke wrote:
 Previously, we translated into NIR and did all the optimizations and
 lowering as part of running fs_visitor.  This meant that we did all of
 that work twice for fragment shaders - once for SIMD8, and again for
 SIMD16.  We also had to redo it every time we hit a state based
 recompile.

 We now generate NIR once at link time.  ARB programs don't have linking,
 so we instead generate it at ProgramStringNotify time.

 Mesa's fixed function vertex program handling doesn't bother to inform
 the driver about new programs at all (which is rather mean), so we
 generate NIR at the last minute, if it hasn't happened already.

 shader-db runs ~9.4% faster on my i7-5600U, with a release build.
>>>
>>> Nice speed improvement but wouldn't it affect negatively programs using
>>> SSO to recombine shaders at run time?
>>
>> Hm... that's a fair question.  Does NIR do any cross-stage optimization?
> 
> Not at the moment.  We probably should since NIR can probably
> dead-code things better.

Okay.  Then this shouldn't impact SSO.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89963] lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm?=::raw ostream::raw ostream()=?UTF-8?Q?’

https://bugs.freedesktop.org/show_bug.cgi?id=89963

Bug ID: 89963
   Summary: lp_bld_debug.cpp:100:31: error: no matching function
for call to ‘llvm::raw_ostream::raw_ostream()’
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org

mesa: a873b79fa5e3138196a3c1785f2a65308fa78286 (master 10.6.0-devel)

Build error with llvm-3.7.0svn.

  Compiling src/gallium/auxiliary/gallivm/lp_bld_debug.cpp ...
src/gallium/auxiliary/gallivm/lp_bld_debug.cpp: In constructor
‘raw_debug_ostream::raw_debug_ostream()’:
src/gallium/auxiliary/gallivm/lp_bld_debug.cpp:100:31: error: no matching
function for call to ‘llvm::raw_ostream::raw_ostream()’
raw_debug_ostream() : pos(0) { }
   ^
src/gallium/auxiliary/gallivm/lp_bld_debug.cpp:100:31: note: candidate is:
In file included from src/gallium/auxiliary/gallivm/lp_bld_debug.cpp:33:0:
include/llvm/Support/raw_ostream.h:95:12: note:
llvm::raw_ostream::raw_ostream(llvm::raw_ostream::StreamKind, bool)
   explicit raw_ostream(StreamKind Kind, bool unbuffered = false)
^
include/llvm/Support/raw_ostream.h:95:12: note:   candidate expects 2
arguments, 0 provided

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] gallium/ttn: minor cleanup

From: Rob Clark 

Extract tgsi_dst->Index into a local.. split out from 'gallium/ttn: add
support for temp arrays' for noise reduction..

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index da935a4..fcccdad 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -340,17 +340,18 @@ ttn_get_dest(struct ttn_compile *c, struct 
tgsi_full_dst_register *tgsi_fdst)
 {
struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
nir_alu_dest dest;
+   unsigned index = tgsi_dst->Index;
 
memset(&dest, 0, sizeof(dest));
 
if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
-  dest.dest.reg.reg = c->temp_regs[tgsi_dst->Index].reg;
-  dest.dest.reg.base_offset = c->temp_regs[tgsi_dst->Index].offset;
+  dest.dest.reg.reg = c->temp_regs[index].reg;
+  dest.dest.reg.base_offset = c->temp_regs[index].offset;
} else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
-  dest.dest.reg.reg = c->output_regs[tgsi_dst->Index].reg;
-  dest.dest.reg.base_offset = c->output_regs[tgsi_dst->Index].offset;
+  dest.dest.reg.reg = c->output_regs[index].reg;
+  dest.dest.reg.base_offset = c->output_regs[index].offset;
} else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
-  assert(tgsi_dst->Index == 0);
+  assert(index == 0);
   dest.dest.reg.reg = c->addr_reg;
}
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] i965/vs: Add src_reg::negative_equals method

From: Ian Romanick 

This method is similar to the existing ::equals method.  Instead of
testing that two src_regs are equal to each other, it tests that one is
the negation of the other.

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_ir_vec4.h |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp  | 43 +
 2 files changed, 44 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index d3bd64d..449795a 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -49,6 +49,7 @@ public:
src_reg(struct brw_reg reg);
 
bool equals(const src_reg &r) const;
+   bool negative_equals(const src_reg &r) const;
 
src_reg(class vec4_visitor *v, const struct glsl_type *type);
src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ef2fd40..d5286c2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -328,6 +328,49 @@ src_reg::equals(const src_reg &r) const
 }
 
 bool
+src_reg::negative_equals(const src_reg &r) const
+{
+   if (file != r.file)
+  return false;
+
+   if (file == IMM) {
+  if (!(reg == r.reg &&
+reg_offset == r.reg_offset &&
+type == r.type &&
+negate == r.negate &&
+abs == r.abs &&
+swizzle == r.swizzle &&
+!reladdr && !r.reladdr))
+ return false;
+
+  switch (fixed_hw_reg.type) {
+  case BRW_REGISTER_TYPE_F:
+ return memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+   sizeof(fixed_hw_reg) - sizeof(fixed_hw_reg.dw1)) == 0 &&
+fixed_hw_reg.dw1.f == -r.fixed_hw_reg.dw1.f;
+
+  case BRW_REGISTER_TYPE_D:
+ return memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+   sizeof(fixed_hw_reg) - sizeof(fixed_hw_reg.dw1)) == 0 &&
+fixed_hw_reg.dw1.d == -r.fixed_hw_reg.dw1.d;
+
+  default:
+ return false;
+  }
+   } else {
+  return reg == r.reg &&
+ reg_offset == r.reg_offset &&
+ type == r.type &&
+ negate != r.negate &&
+ abs == r.abs &&
+ swizzle == r.swizzle &&
+ !reladdr && !r.reladdr &&
+ memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+sizeof(fixed_hw_reg)) == 0;
+   }
+}
+
+bool
 vec4_visitor::opt_vector_float()
 {
bool progress = false;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/5] i965: Emit MUL with a negated src for neg(mul(...)).

From: Matt Turner 

Shader-db results:

GM45:
total instructions in shared programs: 4060151 -> 4059575 (-0.01%)
instructions in affected programs: 81478 -> 80902 (-0.71%)
helped:441
HURT:  4

GM45 NIR:
total instructions in shared programs: 4079065 -> 4078671 (-0.01%)
instructions in affected programs: 67196 -> 66802 (-0.59%)
helped:398
HURT:  4

Iron Lake:
total instructions in shared programs: 5477330 -> 5476586 (-0.01%)
instructions in affected programs: 92444 -> 91700 (-0.80%)
helped:472
HURT:  4

Iron Lake NIR:
total instructions in shared programs: 5675880 -> 5675486 (-0.01%)
instructions in affected programs: 67196 -> 66802 (-0.59%)
helped:398
HURT:  4

Sandy Bridge:
total instructions in shared programs: 7307710 -> 7305083 (-0.04%)
instructions in affected programs: 282903 -> 280276 (-0.93%)
helped:1506

Sandy Bridge NIR:
total instructions in shared programs: 7329198 -> 7328404 (-0.01%)
instructions in affected programs: 133777 -> 132983 (-0.59%)
helped:682

Ivy Bridge:
total instructions in shared programs: 6763747 -> 6763036 (-0.01%)
instructions in affected programs: 79009 -> 78298 (-0.90%)
helped:573

Ivy Bridge NIR:
total instructions in shared programs: 6765185 -> 6764738 (-0.01%)
instructions in affected programs: 61743 -> 61296 (-0.72%)
helped:445

Haswell:
total instructions in shared programs: 6223429 -> 6222718 (-0.01%)
instructions in affected programs: 77427 -> 76716 (-0.92%)
helped:573

Haswell NIR:
total instructions in shared programs: 6180970 -> 6180523 (-0.01%)
instructions in affected programs: 61743 -> 61296 (-0.72%)
helped:445

Broadwell:
total instructions in shared programs: 7284540 -> 7284103 (-0.01%)
instructions in affected programs: 31526 -> 31089 (-1.39%)
helped:251

Broadwell NIR:
total instructions in shared programs: 7500487 -> 7500487 (0.00%)
instructions in affected programs: 0 -> 0

Reviewed-by: Ben Widawsky 
Reviewed-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 15 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 20 
 2 files changed, 35 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 3622e65..574c266 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -700,6 +700,21 @@ fs_visitor::visit(ir_expression *ir)
 
/* Deal with the real oddball stuff first */
switch (ir->operation) {
+   case ir_unop_neg: {
+  if (!ir->type->is_float())
+ break;
+
+  ir_expression *mul = ir->operands[0]->as_expression();
+  if (mul && mul->operation == ir_binop_mul) {
+ mul->accept(this);
+ fs_inst *mul_inst = (fs_inst *) this->instructions.get_tail();
+ assert(mul_inst->opcode == BRW_OPCODE_MUL);
+
+ mul_inst->src[1].negate = true;
+ return;
+  }
+  break;
+   }
case ir_binop_add:
   if (brw->gen <= 5 && try_emit_line(ir))
  return;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ffbe04d..b9d6087 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1303,6 +1303,26 @@ vec4_visitor::visit(ir_expression *ir)
src_reg op[ARRAY_SIZE(ir->operands)];
vec4_instruction *inst;
 
+   switch (ir->operation) {
+   case ir_unop_neg: {
+  if (!ir->type->is_float())
+ break;
+
+  ir_expression *mul = ir->operands[0]->as_expression();
+  if (mul && mul->operation == ir_binop_mul) {
+ mul->accept(this);
+ vec4_instruction *mul_inst = (vec4_instruction *) 
this->instructions.get_tail();
+ assert(mul_inst->opcode == BRW_OPCODE_MUL);
+
+ mul_inst->src[1].negate = true;
+ return;
+  }
+  break;
+   }
+   default:
+  break;
+   }
+
if (ir->operation == ir_binop_add) {
   if (try_emit_mad(ir))
 return;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/5] glsl/cse: Use ir_rvalue_enter_visitor instead of ir_rvalue_visitor

From: Ian Romanick 

ir_rvalue_visitor visits each rvalue on exit.  When visiting a large
expression, the leaf expressions will be visited and eliminated first.
Once one leaf expression was replaced, it would no longer match a
potentially much larger tree.  This means that code like:

x = a + (b * c);
y = -(a + (b * c));

would effectively be replaced by

tmp = b * c;
x = a + tmp;
y = -(a + tmp);

As a result both opportunities for generating a MAD would be lost, and
we would generate worse code.

Using ir_rvalue_enter_visitor means that larger expression trees will be
checked first, and we have the potential to eliminate much larger
expressions.

I believe that opt_cse.cpp predates the existence of
ir_rvalue_enter_visitor.

Shader-db results:

GM45:
total instructions in shared programs: 4063165 -> 4061744 (-0.03%)
instructions in affected programs: 21664 -> 20243 (-6.56%)
helped:259

GM45 NIR:
total instructions in shared programs: 4082044 -> 4080646 (-0.03%)
instructions in affected programs: 21091 -> 19693 (-6.63%)
helped:255
HURT:  1

Iron Lake:
total instructions in shared programs: 5480334 -> 5478897 (-0.03%)
instructions in affected programs: 25798 -> 24361 (-5.57%)
helped:273
HURT:  1

Iron Lake NIR:
total instructions in shared programs: 5678776 -> 5677395 (-0.02%)
instructions in affected programs: 21744 -> 20363 (-6.35%)
helped:263
HURT:  2

Sandy Bridge:
total instructions in shared programs: 7318903 -> 7316983 (-0.03%)
instructions in affected programs: 37937 -> 36017 (-5.06%)
helped:398
HURT:  26

Sandy Bridge NIR:
total instructions in shared programs: 7329995 -> 7328069 (-0.03%)
instructions in affected programs: 32487 -> 30561 (-5.93%)
helped:384
HURT:  6

Ivy Bridge:
total instructions in shared programs: 6766579 -> 6765409 (-0.02%)
instructions in affected programs: 18110 -> 16940 (-6.46%)
helped:288
HURT:  16
GAINED:1

Ivy Bridge NIR:
total instructions in shared programs: 6769314 -> 6768159 (-0.02%)
instructions in affected programs: 11063 -> 9908 (-10.44%)
helped:264
HURT:  6

Haswell:
total instructions in shared programs: 6226294 -> 6225102 (-0.02%)
instructions in affected programs: 17555 -> 16363 (-6.79%)
helped:297
HURT:  10
GAINED:1

Haswell NIR:
total instructions in shared programs: 6183693 -> 6182538 (-0.02%)
instructions in affected programs: 10990 -> 9835 (-10.51%)
helped:264
HURT:  6

Broadwell:
total instructions in shared programs: 7285895 -> 7284537 (-0.02%)
instructions in affected programs: 31977 -> 30619 (-4.25%)
helped:357
HURT:  6

Broadwell NIR:
total instructions in shared programs: 7501711 -> 7501544 (-0.00%)
instructions in affected programs: 7174 -> 7007 (-2.33%)
helped:87
HURT:  2

Signed-off-by: Ian Romanick 
---
 src/glsl/opt_cse.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/opt_cse.cpp b/src/glsl/opt_cse.cpp
index 4b8e9a0..425eebc 100644
--- a/src/glsl/opt_cse.cpp
+++ b/src/glsl/opt_cse.cpp
@@ -99,7 +99,7 @@ public:
ir_variable *var;
 };
 
-class cse_visitor : public ir_rvalue_visitor {
+class cse_visitor : public ir_rvalue_enter_visitor {
 public:
cse_visitor(exec_list *validate_instructions)
   : validate_instructions(validate_instructions)
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] i965/vs: Allow CSE to handle MULs with negated arguments.

From: Ian Romanick 

This is similar to commit (47c4b38: i965/fs: Allow CSE to handle MULs
with negated arguments.), but it uses a slightly different approach.

Shader-db results:

GM45:
total instructions in shared programs: 4060813 -> 4060151 (-0.02%)
instructions in affected programs: 13448 -> 12786 (-4.92%)
helped:62
HURT:  9

All other results, except Broadwell, were identical to GM45 w/o NIR.
Since NIR isn't used for VEC4, this makes sense.

Broadwell:
total instructions in shared programs: 7284561 -> 7284540 (-0.00%)
instructions in affected programs: 1272 -> 1251 (-1.65%)
helped:12

Broadwell NIR:
total instructions in shared programs: 7500487 -> 7500487 (0.00%)
instructions in affected programs: 0 -> 0

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 32 +-
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 100e511..49b50a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -90,15 +90,34 @@ is_expression(const vec4_instruction *const inst)
 }
 
 static bool
-operands_match(const vec4_instruction *a, const vec4_instruction *b)
+operands_match(const vec4_instruction *a, const vec4_instruction *b,
+   bool *negate)
 {
const src_reg *xs = a->src;
const src_reg *ys = b->src;
 
+   *negate = false;
+
if (a->opcode == BRW_OPCODE_MAD) {
   return xs[0].equals(ys[0]) &&
  ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
   (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
+   } else if (a->opcode == BRW_OPCODE_MUL) {
+  if ((xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
+  (xs[1].equals(ys[0]) && xs[0].equals(ys[1])) ||
+  (xs[0].negative_equals(ys[0]) && xs[1].negative_equals(ys[1])) ||
+  (xs[1].negative_equals(ys[0]) && xs[0].negative_equals(ys[1])))
+ return true;
+
+  if ((xs[0].equals(ys[0]) && xs[1].negative_equals(ys[1])) ||
+  (xs[1].equals(ys[0]) && xs[0].negative_equals(ys[1])) ||
+  (xs[0].negative_equals(ys[0]) && xs[1].equals(ys[1])) ||
+  (xs[1].negative_equals(ys[0]) && xs[0].equals(ys[1]))) {
+ *negate = true;
+ return true;
+  }
+
+  return false;
} else if (!a->is_commutative()) {
   return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
} else {
@@ -108,7 +127,7 @@ operands_match(const vec4_instruction *a, const 
vec4_instruction *b)
 }
 
 static bool
-instructions_match(vec4_instruction *a, vec4_instruction *b)
+instructions_match(vec4_instruction *a, vec4_instruction *b, bool *negate)
 {
return a->opcode == b->opcode &&
   a->saturate == b->saturate &&
@@ -117,7 +136,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
   a->dst.writemask == b->dst.writemask &&
   a->force_writemask_all == b->force_writemask_all &&
   a->regs_written == b->regs_written &&
-  operands_match(a, b);
+  operands_match(a, b, negate);
 }
 
 bool
@@ -135,11 +154,12 @@ vec4_visitor::opt_cse_local(bblock_t *block)
   (inst->dst.file != HW_REG || inst->dst.is_null()))
   {
  bool found = false;
+ bool negate;
 
  foreach_in_list_use_after(aeb_entry, entry, &aeb) {
 /* Match current instruction's expression against those in AEB. */
 if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&
-instructions_match(inst, entry->generator)) {
+instructions_match(inst, entry->generator, &negate)) {
found = true;
progress = true;
break;
@@ -186,6 +206,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
   vec4_instruction *copy = MOV(offset(inst->dst, i),
offset(entry->tmp, i));
   copy->force_writemask_all = inst->force_writemask_all;
+  copy->src[0].negate = negate;
   inst->insert_before(block, copy);
}
 }
@@ -206,9 +227,10 @@ vec4_visitor::opt_cse_local(bblock_t *block)
   * the flag register if we just wrote it.
   */
  if (inst->writes_flag()) {
+bool negate; /* dummy */
 if (entry->generator->reads_flag() ||
 (entry->generator->writes_flag() &&
- !instructions_match(inst, entry->generator))) {
+ !instructions_match(inst, entry->generator, &negate))) {
entry->remove();
ralloc_free(entry);
continue;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/ma

Re: [Mesa-dev] [PATCH 1/2] gallivm: don't use control flow when doing indirect constant buffer lookups

2015-04-08 Thread Roland Scheidegger

Am 08.04.2015 um 21:13 schrieb Jose Fonseca:
> Series looks good to me.
> 
> Just a few suggestions inline.
> 
> 
> On 04/04/15 15:50, srol...@vmware.com wrote:
>> From: Roland Scheidegger 
>>
>> llvm goes crazy when doing that, using way more memory and time,
>> though there's
>> probably more to it - this points to a very much similar issue as
>> fixed in
>> 8a9f5ecdb116d0449d63f7b94efbfa8b205d826f. In any case I've seen a quite
>> plain looking vertex shader with just ~50 simple tgsi instructions
>> (but with a
>> dozen or so such indirect constant buffer lookups) go from a terribly
>> high
>> ~440ms compile time (consuming 25MB of memory in the process) down to
>> a still
>> awful ~230ms and 13MB with this fix (with llvm 3.3), so there's still
>> obvious
>> improvements possible (but I have no clue why it's so slow...).
>> The resulting shader is most likely also faster (certainly seemed so
>> though
>> I don't have any hard numbers as it may have been influenced by
>> compile times)
>> since generally fetching constants outside the buffer range is most
>> likely an
>> app error (that is we expect all indices to be valid).
>> It is possible this fixes some mysterious vertex shader slowdowns
>> we've seen
>> ever since we are conforming to newer apis at least partially (the
>> main draw
>> loop also has similar looking conditionals which we probably could do
>> without -
>> if not for the fetch at least for the additional elts condition.)
>> ---
>>   src/gallium/auxiliary/draw/draw_llvm.h |  2 +
>>   .../draw/draw_pt_fetch_shade_pipeline_llvm.c   | 27 +++---
>>   src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c| 95
>> +-
>>   src/gallium/drivers/llvmpipe/lp_scene.h|  2 +
>>   src/gallium/drivers/llvmpipe/lp_setup.c|  6 +-
>>   5 files changed, 63 insertions(+), 69 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/draw/draw_llvm.h
>> b/src/gallium/auxiliary/draw/draw_llvm.h
>> index 9565fc6..a1983e1 100644
>> --- a/src/gallium/auxiliary/draw/draw_llvm.h
>> +++ b/src/gallium/auxiliary/draw/draw_llvm.h
>> @@ -472,6 +472,8 @@ struct draw_llvm {
>>
>>  struct draw_gs_llvm_variant_list_item gs_variants_list;
>>  int nr_gs_variants;
>> +
>> +   float fake_const_buf[4];
> 
> Couldn't we make fake_const_buf a mere local static const array instead?
> It would save memory.
Ah right can just declare that in llvm_middle_end_prepare(). Putting it
in the scene data seemed convenient for the fs case (there's only one
scene per context anyway but I guess could do the same thing there too).

> 
>>   };
>>
>>
>> diff --git
>> a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
>> b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
>> index 0dfafdc..03257d8 100644
>> --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
>> +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
>> @@ -273,28 +273,35 @@ llvm_middle_end_bind_parameters(struct
>> draw_pt_middle_end *middle)
>>   {
>>  struct llvm_middle_end *fpme = llvm_middle_end(middle);
>>  struct draw_context *draw = fpme->draw;
>> +   struct draw_llvm *llvm = fpme->llvm;
>>  unsigned i;
>>
>> -   for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants);
>> ++i) {
>> +   for (i = 0; i < Elements(llvm->jit_context.vs_constants); ++i) {
>> int num_consts =
>>draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4);
>> -  fpme->llvm->jit_context.vs_constants[i] =
>> draw->pt.user.vs_constants[i];
>> -  fpme->llvm->jit_context.num_vs_constants[i] = num_consts;
>> +  llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
>> +  llvm->jit_context.num_vs_constants[i] = num_consts;
>> +  if (num_consts == 0) {
>> + llvm->jit_context.vs_constants[i] = llvm->fake_const_buf;
>> +  }
>>  }
>> -   for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants);
>> ++i) {
>> +   for (i = 0; i < Elements(llvm->gs_jit_context.constants); ++i) {
>> int num_consts =
>>draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4);
>> -  fpme->llvm->gs_jit_context.constants[i] =
>> draw->pt.user.gs_constants[i];
>> -  fpme->llvm->gs_jit_context.num_constants[i] = num_consts;
>> +  llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
>> +  llvm->gs_jit_context.num_constants[i] = num_consts;
>> +  if (num_consts == 0) {
>> + llvm->gs_jit_context.constants[i] = llvm->fake_const_buf;
>> +  }
>>  }
>>
>> -   fpme->llvm->jit_context.planes =
>> +   llvm->jit_context.planes =
>> (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
>> -   fpme->llvm->gs_jit_context.planes =
>> +   llvm->gs_jit_context.planes =
>> (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
>>
>> -   fpme->llvm->jit_context.viewports = draw->viewports;
>> -   fpme->llvm->gs_jit_context.viewports = draw->viewports;

[Mesa-dev] [Bug 89960] [softpipe] piglit copy-pixels regreession

https://bugs.freedesktop.org/show_bug.cgi?id=89960

Bug ID: 89960
   Summary: [softpipe] piglit copy-pixels regreession
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Keywords: bisected, regression
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: v...@freedesktop.org
QA Contact: mesa-dev@lists.freedesktop.org
CC: airl...@freedesktop.org

mesa: a873b79fa5e3138196a3c1785f2a65308fa78286 (master 10.6.0-devel)

$ ./bin/copy-pixels -auto
Probe stencil at (12, 12)
  Expected: 2
  Observed: 0
PIGLIT: {"result": "fail" }

61393bdcdc3b63624bf6e9730444f5e9deeedfc8 is the first bad commit
commit 61393bdcdc3b63624bf6e9730444f5e9deeedfc8
Author: Dave Airlie 
Date:   Tue Apr 7 09:52:41 2015 +1000

u_tile: fix stencil texturing tests under softpipe

arb_stencil_texturing-draw failed under softpipe because we got a float
back from the texturing function, and then tried to U2F it, stencil
texturing returns ints, so we should fix the tiling to retrieve
the stencil values as integers not floats.

Signed-off-by: Dave Airlie 

:04 04 aad741af761764f93de05cb9a202b41a56c96645
55f28cb0eaf6c88a7f0e52e7baa67d221779a2ce Msrc
bisect run success

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/3] gallium/ttn: add support for temp arrays

On Wed, Apr 8, 2015 at 6:34 PM, Rob Clark  wrote:
> From: Rob Clark 
>
> Since the rest of NIR really would rather have these as variables rather
> than registers, create a nir_variable per array.  But rather than
> completely re-arrange ttn to be variable based rather than register
> based, keep the registers.  In the cases where there is a matching var
> for the reg, ttn_emit_instruction will append the appropriate intrinsic
> to get things back from the shadow reg into the variable.

hmm, bleh, I probably should have updated the commit msg before hitting send..

this version drops the pre-declared registers for array elements, and
instead creates temp regs on demand as Eric suggested (and mentioned
in the v3 note below)

BR,
-R


> NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give
> an array id.  But those just kinda suck, and should really go away.
> AFAICT we don't get those from glsl.  Might be an issue for some other
> state tracker.
>
> v2: rework to use load_var/store_var with deref chains
> v3: create new "burner" reg for temporarily holding the (potentially
> writemask'd) dest after each instruction; add load_var to initialize
> temporary dest in case not all components are overwritten
>
> Signed-off-by: Rob Clark 
> ---
>  src/gallium/auxiliary/nir/tgsi_to_nir.c | 159 
> +---
>  1 file changed, 144 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
> b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index fcccdad..c3332cc 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -44,6 +44,7 @@
>  struct ttn_reg_info {
> /** nir register containing this TGSI index. */
> nir_register *reg;
> +   nir_variable *var;
> /** Offset (in vec4s) from the start of var for this TGSI index. */
> int offset;
>  };
> @@ -120,21 +121,32 @@ ttn_emit_declaration(struct ttn_compile *c)
> unsigned i;
>
> if (file == TGSI_FILE_TEMPORARY) {
> -  nir_register *reg;
> -  if (c->scan->indirect_files & (1 << file)) {
> - reg = nir_local_reg_create(b->impl);
> - reg->num_components = 4;
> - reg->num_array_elems = array_size;
> +  if (decl->Declaration.Array) {
> + /* for arrays, we create variables instead of registers: */
> + nir_variable *var = rzalloc(b->shader, nir_variable);
> +
> + var->type = glsl_array_type(glsl_vec4_type(), array_size);
> + var->data.mode = nir_var_global;
> + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
> +
> + exec_list_push_tail(&b->shader->globals, &var->node);
>
>   for (i = 0; i < array_size; i++) {
> -c->temp_regs[decl->Range.First + i].reg = reg;
> +/* point all the matching slots to the same var,
> + * with appropriate offset set, mostly just so
> + * we know what to do when tgsi does a non-indirect
> + * access
> + */
> +c->temp_regs[decl->Range.First + i].reg = NULL;
> +c->temp_regs[decl->Range.First + i].var = var;
>  c->temp_regs[decl->Range.First + i].offset = i;
>   }
>} else {
>   for (i = 0; i < array_size; i++) {
> -reg = nir_local_reg_create(b->impl);
> +nir_register *reg = nir_local_reg_create(b->impl);
>  reg->num_components = 4;
>  c->temp_regs[decl->Range.First + i].reg = reg;
> +c->temp_regs[decl->Range.First + i].var = NULL;
>  c->temp_regs[decl->Range.First + i].offset = 0;
>   }
>}
> @@ -245,6 +257,32 @@ ttn_emit_immediate(struct ttn_compile *c)
>  static nir_src *
>  ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register 
> *indirect);
>
> +/* generate either a constant or indirect deref chain for accessing an
> + * array variable.
> + */
> +static nir_deref_var *
> +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
> +struct tgsi_ind_register *indirect)
> +{
> +   nir_builder *b = &c->build;
> +   nir_deref_var *deref = nir_deref_var_create(b->shader, var);
> +   nir_deref_array *arr = nir_deref_array_create(b->shader);
> +
> +   arr->base_offset = offset;
> +   arr->deref.type = glsl_get_array_element(var->type);
> +
> +   if (indirect) {
> +  arr->deref_array_type = nir_deref_array_type_indirect;
> +  arr->indirect = nir_src_for_reg(c->addr_reg);
> +   } else {
> +  arr->deref_array_type = nir_deref_array_type_direct;
> +   }
> +
> +   deref->deref.child = &arr->deref;
> +
> +   return deref;
> +}
> +
>  static nir_src
>  ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned 
> index,
> struct tgsi_ind_register *indirect)
> @@ -256,10 +294,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, 
> unsigned file, unsigned index,
>
> switch (file) {
> case TGSI_FILE_TE

Re: [Mesa-dev] [PATCH] i965: Add XRGB8888 format to intel_screen_make_configs

2015-04-08 Thread Chih-Wei Huang

Please consider i915 as well.
Should it be add to .../dri/i915/intel_screen.c?

diff --git a/src/mesa/drivers/dri/i915/intel_screen.c
b/src/mesa/drivers/dri/i915/intel_screen.c
index 34efb29..5cd2a9b 100644
--- a/src/mesa/drivers/dri/i915/intel_screen.c
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -1061,7 +1076,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
 {
static const mesa_format formats[] = {
   MESA_FORMAT_B5G6R5_UNORM,
-  MESA_FORMAT_B8G8R8A8_UNORM
+  MESA_FORMAT_B8G8R8A8_UNORM,
+  MESA_FORMAT_B8G8R8X8_UNORM,
};

/* GLX_SWAP_COPY_OML is not supported due to page flipping. */

2015-03-25 19:36 GMT+08:00 Boyan Ding :
> Some application, such as drm backend of weston, uses XRGB config as
> default. i965 doesn't provide this format, but before commit 65c8965d,
> the drm platform of EGL takes ARGB as XRGB. Now that commit
> 65c8965d makes EGL recognize format correctly so weston won't start
> because it can't find XRGB. Add XRGB format to i965 just as
> other drivers do.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
> Signed-off-by: Boyan Ding 
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 3640b67..2b82c33 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1126,7 +1126,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
>  {
> static const mesa_format formats[] = {
>MESA_FORMAT_B5G6R5_UNORM,
> -  MESA_FORMAT_B8G8R8A8_UNORM
> +  MESA_FORMAT_B8G8R8A8_UNORM,
> +  MESA_FORMAT_B8G8R8X8_UNORM
> };
>
> /* GLX_SWAP_COPY_OML is not supported due to page flipping. */
> --
> 2.3.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



-- 
Chih-Wei
Android-x86 project
http://www.android-x86.org
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 89963] lp_bld_debug.cpp:100:31: error: no matching function for call to ‘llvm?=::raw ostream::raw ostream()=?UTF-8?Q?’

https://bugs.freedesktop.org/show_bug.cgi?id=89963

Vinson Lee  changed:

   What|Removed |Added

   Keywords||bisected

--- Comment #1 from Vinson Lee  ---
Build error introduced with llvm-3.7.0svn r234460.

commit 271631a0afecebfb806e8d4d67407c919c4e1c0d
Author: Rafael Espindola 
Date:   Thu Apr 9 02:10:28 2015 +

Add classof implementations to the raw_ostream classes.

More uses to follow in a another patch.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234460
91177308-0d34-0410-b5e6-96231b3b80d8

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

On Wed, Apr 8, 2015 at 12:53 PM, Ian Romanick  wrote:
> On 04/08/2015 02:25 AM, Martin Peres wrote:
>> On 08/04/15 10:06, Kenneth Graunke wrote:
>>> Previously, we translated into NIR and did all the optimizations and
>>> lowering as part of running fs_visitor.  This meant that we did all of
>>> that work twice for fragment shaders - once for SIMD8, and again for
>>> SIMD16.  We also had to redo it every time we hit a state based
>>> recompile.
>>>
>>> We now generate NIR once at link time.  ARB programs don't have linking,
>>> so we instead generate it at ProgramStringNotify time.
>>>
>>> Mesa's fixed function vertex program handling doesn't bother to inform
>>> the driver about new programs at all (which is rather mean), so we
>>> generate NIR at the last minute, if it hasn't happened already.
>>>
>>> shader-db runs ~9.4% faster on my i7-5600U, with a release build.
>>
>> Nice speed improvement but wouldn't it affect negatively programs using
>> SSO to recombine shaders at run time?
>
> Hm... that's a fair question.  Does NIR do any cross-stage optimization?

Not at the moment.  We probably should since NIR can probably
dead-code things better.
--Jason

>>> Signed-off-by: Kenneth Graunke 
>>> ---
>>>   src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>>>   src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 174
>>> +--
>>>   src/mesa/drivers/dri/i965/brw_nir.c| 213
>>> +
>>>   src/mesa/drivers/dri/i965/brw_nir.h|   6 +
>>>   src/mesa/drivers/dri/i965/brw_program.c|   7 +
>>>   src/mesa/drivers/dri/i965/brw_shader.cpp   |   6 +
>>>   src/mesa/drivers/dri/i965/brw_vec4.cpp |  17 ++-
>>>   src/mesa/main/mtypes.h |   2 +
>>>   src/mesa/program/program.c |   5 +
>>>   9 files changed, 255 insertions(+), 176 deletions(-)
>>>   create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources
>>> b/src/mesa/drivers/dri/i965/Makefile.sources
>>> index 498d5a7..6d4659f 100644
>>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>>> @@ -77,6 +77,7 @@ i965_FILES = \
>>>   brw_misc_state.c \
>>>   brw_multisample_state.h \
>>>   brw_nir.h \
>>> +brw_nir.c \
>>>   brw_nir_analyze_boolean_resolves.c \
>>>   brw_object_purgeable.c \
>>>   brw_packed_float.c \
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> index 034b79a..ccffd5d 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
>>> @@ -28,175 +28,10 @@
>>>   #include "brw_fs.h"
>>>   #include "brw_nir.h"
>>>   -static void
>>> -nir_optimize(nir_shader *nir)
>>> -{
>>> -   bool progress;
>>> -   do {
>>> -  progress = false;
>>> -  nir_lower_vars_to_ssa(nir);
>>> -  nir_validate_shader(nir);
>>> -  nir_lower_alu_to_scalar(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_copy_prop(nir);
>>> -  nir_validate_shader(nir);
>>> -  nir_lower_phis_to_scalar(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_copy_prop(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_opt_dce(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_opt_cse(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_opt_peephole_select(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_opt_algebraic(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_opt_constant_folding(nir);
>>> -  nir_validate_shader(nir);
>>> -  progress |= nir_opt_remove_phis(nir);
>>> -  nir_validate_shader(nir);
>>> -   } while (progress);
>>> -}
>>> -
>>> -static bool
>>> -count_nir_instrs_in_block(nir_block *block, void *state)
>>> -{
>>> -   int *count = (int *) state;
>>> -   nir_foreach_instr(block, instr) {
>>> -  *count = *count + 1;
>>> -   }
>>> -   return true;
>>> -}
>>> -
>>> -static int
>>> -count_nir_instrs(nir_shader *nir)
>>> -{
>>> -   int count = 0;
>>> -   nir_foreach_overload(nir, overload) {
>>> -  if (!overload->impl)
>>> - continue;
>>> -  nir_foreach_block(overload->impl, count_nir_instrs_in_block,
>>> &count);
>>> -   }
>>> -   return count;
>>> -}
>>> -
>>>   void
>>>   fs_visitor::emit_nir_code()
>>>   {
>>> -   const nir_shader_compiler_options *options =
>>> -  ctx->Const.ShaderCompilerOptions[stage].NirOptions;
>>> -
>>> -   nir_shader *nir;
>>> -   /* First, lower the GLSL IR or Mesa IR to NIR */
>>> -   if (shader_prog) {
>>> -  nir = glsl_to_nir(&shader->base, options);
>>> -   } else {
>>> -  nir = prog_to_nir(prog, options);
>>> -  nir_convert_to_ssa(nir); /* turn registers into SSA */
>>> -   }
>>> -   nir_validate_shader(nir);
>>> -
>>> -   nir_lower_global_vars_to_local(nir);
>>> -   nir_validate_shader(nir);
>>> -
>>> -   nir_lower_tex_projector(n

[Mesa-dev] [PATCH] gallivm: Fix build since llvm-3.7.0svn r234460.

2015-04-08 Thread Vinson Lee

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89963
Signed-off-by: Vinson Lee 
---
 src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 65d2896..b712915 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -97,7 +97,11 @@ private:
uint64_t pos;
 
 public:
+#if HAVE_LLVM >= 0x0307
+   raw_debug_ostream() : raw_ostream(SK_FD), pos(0) { }
+#else
raw_debug_ostream() : pos(0) { }
+#endif
 
void write_impl(const char *Ptr, size_t Size);
 
-- 
2.3.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] gallium/ttn: use single component address register

From: Rob Clark 

Only needs to be a vec1, and this helps out the later opt stages.  From
the shader (after opt) for fs-temp-array-mat3-index-col-row-wr goes,
before:

vec1 ssa_408 = imul ssa_155, ssa_1
vec4 ssa_413 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w
vec4 ssa_166 = intrinsic load_uniform () () (0, 1)
vec4 ssa_772 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z
intrinsic store_var (ssa_772) (arr_5[ssa_413]) ()
vec4 ssa_416 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w
vec4 ssa_178 = intrinsic load_uniform () () (1, 1)
vec4 ssa_787 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z
intrinsic store_var (ssa_787) (arr_5[1 + ssa_416]) ()
vec4 ssa_190 = intrinsic load_uniform () () (2, 1)
vec4 ssa_802 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z
intrinsic store_var (ssa_802) (arr_5[2 + ssa_416]) ()

after:

vec1 ssa_408 = imul ssa_155, ssa_1
vec4 ssa_166 = intrinsic load_uniform () () (0, 1)
vec4 ssa_763 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z
intrinsic store_var (ssa_763) (arr_5[ssa_408]) ()
vec4 ssa_178 = intrinsic load_uniform () () (1, 1)
vec4 ssa_778 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z
intrinsic store_var (ssa_778) (arr_5[1 + ssa_408]) ()
vec4 ssa_190 = intrinsic load_uniform () () (2, 1)
vec4 ssa_793 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z
intrinsic store_var (ssa_793) (arr_5[2 + ssa_408]) ()

ie. it realizes the indirect is the same for all three store_var's
which avoids my backend generating duplicate (mov (shl (cov)))
instruction chains.

v2: add assert, and get rid of pointless imov in other indirect paths

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index c3332cc..648ac6f 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -152,7 +152,7 @@ ttn_emit_declaration(struct ttn_compile *c)
   }
} else if (file == TGSI_FILE_ADDRESS) {
   c->addr_reg = nir_local_reg_create(b->impl);
-  c->addr_reg->num_components = 4;
+  c->addr_reg->num_components = 1;
} else if (file == TGSI_FILE_SAMPLER) {
   /* Nothing to record for samplers. */
} else {
@@ -350,12 +350,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned 
file, unsigned index,
   load->const_index[0] = index;
   load->const_index[1] = 1;
   if (indirect) {
- nir_alu_src indirect_address;
- memset(&indirect_address, 0, sizeof(indirect_address));
- indirect_address.src = nir_src_for_reg(c->addr_reg);
- for (int i = 0; i < 4; i++)
-indirect_address.swizzle[i] = indirect->Swizzle;
- load->src[0] = nir_src_for_ssa(nir_imov_alu(b, indirect_address, 1));
+ assert(indirect->Swizzle == TGSI_SWIZZLE_X);
+ load->src[0] = nir_src_for_reg(c->addr_reg);
   }
   nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
   nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
@@ -500,6 +496,8 @@ ttn_get_src(struct ttn_compile *c, struct 
tgsi_full_src_register *tgsi_fsrc)
tgsi_src->Index,
(tgsi_src->Indirect ?
 &tgsi_fsrc->Indirect : NULL));
+  if (tgsi_src->File == TGSI_FILE_ADDRESS)
+ assert(tgsi_src->SwizzleX == TGSI_SWIZZLE_X);
}
 
src.swizzle[0] = tgsi_src->SwizzleX;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] gallium/ttn: add support for temp arrays

From: Rob Clark 

Since the rest of NIR really would rather have these as variables rather
than registers, create a nir_variable per array.  But rather than
completely re-arrange ttn to be variable based rather than register
based, keep the registers.  In the cases where there is a matching var
for the reg, ttn_emit_instruction will append the appropriate intrinsic
to get things back from the shadow reg into the variable.

NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give
an array id.  But those just kinda suck, and should really go away.
AFAICT we don't get those from glsl.  Might be an issue for some other
state tracker.

v2: rework to use load_var/store_var with deref chains
v3: create new "burner" reg for temporarily holding the (potentially
writemask'd) dest after each instruction; add load_var to initialize
temporary dest in case not all components are overwritten

Signed-off-by: Rob Clark 
---
 src/gallium/auxiliary/nir/tgsi_to_nir.c | 159 +---
 1 file changed, 144 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c 
b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index fcccdad..c3332cc 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -44,6 +44,7 @@
 struct ttn_reg_info {
/** nir register containing this TGSI index. */
nir_register *reg;
+   nir_variable *var;
/** Offset (in vec4s) from the start of var for this TGSI index. */
int offset;
 };
@@ -120,21 +121,32 @@ ttn_emit_declaration(struct ttn_compile *c)
unsigned i;
 
if (file == TGSI_FILE_TEMPORARY) {
-  nir_register *reg;
-  if (c->scan->indirect_files & (1 << file)) {
- reg = nir_local_reg_create(b->impl);
- reg->num_components = 4;
- reg->num_array_elems = array_size;
+  if (decl->Declaration.Array) {
+ /* for arrays, we create variables instead of registers: */
+ nir_variable *var = rzalloc(b->shader, nir_variable);
+
+ var->type = glsl_array_type(glsl_vec4_type(), array_size);
+ var->data.mode = nir_var_global;
+ var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
+
+ exec_list_push_tail(&b->shader->globals, &var->node);
 
  for (i = 0; i < array_size; i++) {
-c->temp_regs[decl->Range.First + i].reg = reg;
+/* point all the matching slots to the same var,
+ * with appropriate offset set, mostly just so
+ * we know what to do when tgsi does a non-indirect
+ * access
+ */
+c->temp_regs[decl->Range.First + i].reg = NULL;
+c->temp_regs[decl->Range.First + i].var = var;
 c->temp_regs[decl->Range.First + i].offset = i;
  }
   } else {
  for (i = 0; i < array_size; i++) {
-reg = nir_local_reg_create(b->impl);
+nir_register *reg = nir_local_reg_create(b->impl);
 reg->num_components = 4;
 c->temp_regs[decl->Range.First + i].reg = reg;
+c->temp_regs[decl->Range.First + i].var = NULL;
 c->temp_regs[decl->Range.First + i].offset = 0;
  }
   }
@@ -245,6 +257,32 @@ ttn_emit_immediate(struct ttn_compile *c)
 static nir_src *
 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register 
*indirect);
 
+/* generate either a constant or indirect deref chain for accessing an
+ * array variable.
+ */
+static nir_deref_var *
+ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
+struct tgsi_ind_register *indirect)
+{
+   nir_builder *b = &c->build;
+   nir_deref_var *deref = nir_deref_var_create(b->shader, var);
+   nir_deref_array *arr = nir_deref_array_create(b->shader);
+
+   arr->base_offset = offset;
+   arr->deref.type = glsl_get_array_element(var->type);
+
+   if (indirect) {
+  arr->deref_array_type = nir_deref_array_type_indirect;
+  arr->indirect = nir_src_for_reg(c->addr_reg);
+   } else {
+  arr->deref_array_type = nir_deref_array_type_direct;
+   }
+
+   deref->deref.child = &arr->deref;
+
+   return deref;
+}
+
 static nir_src
 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned 
index,
struct tgsi_ind_register *indirect)
@@ -256,10 +294,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, 
unsigned file, unsigned index,
 
switch (file) {
case TGSI_FILE_TEMPORARY:
-  src.reg.reg = c->temp_regs[index].reg;
-  src.reg.base_offset = c->temp_regs[index].offset;
-  if (indirect)
- src.reg.indirect = ttn_src_for_indirect(c, indirect);
+  if (c->temp_regs[index].var) {
+ unsigned offset = c->temp_regs[index].offset;
+ nir_variable *var = c->temp_regs[index].var;
+ nir_intrinsic_instr *load;
+
+ load = nir_intrinsic_instr_create(b->shader,
+   nir_intrinsic_load_var);
+ load-

Re: [Mesa-dev] [PATCH] glsl: check for forced_language_version in is_version()

Reviewed-by: Ian Romanick 

On 04/07/2015 09:33 AM, Brian Paul wrote:
> Ping.
> 
> On 04/01/2015 02:38 PM, Brian Paul wrote:
>> This is a follow-on fix from the earlier "glsl: allow ForceGLSLVersion
>> to override #version directives" change.  Since we're not changing
>> the language_version field, we have to check forced_language_version
>> here.
>> ---
>>   src/glsl/glsl_parser_extras.h | 4 +++-
>>   1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/glsl/glsl_parser_extras.h
>> b/src/glsl/glsl_parser_extras.h
>> index 1f5478b..dae7864 100644
>> --- a/src/glsl/glsl_parser_extras.h
>> +++ b/src/glsl/glsl_parser_extras.h
>> @@ -105,8 +105,10 @@ struct _mesa_glsl_parse_state {
>>  {
>> unsigned required_version = this->es_shader ?
>>required_glsl_es_version : required_glsl_version;
>> +  unsigned this_version = this->forced_language_version
>> + ? this->forced_language_version : this->language_version;
>> return required_version != 0
>> - && this->language_version >= required_version;
>> + && this_version >= required_version;
>>  }
>>
>>  bool check_version(unsigned required_glsl_version,
>>
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Problem with LLVM on Windows with MSVC

2015-04-08 Thread Shervin Sharifi

Hi,

 I'm trying to build mesa on windows (MSVC) with gles support and with llvm.
 Here are the keys I'm using:
scons.py gles=yes llvm=yes platform=windows libgl-gd

 I'm getting a bunch of errors like this:

LLVMCore.lib(ValueSymbolTable.obj) : error LNK2038: mismatch detected for
'RuntimeLibrary': value 'MDd_DynamicDebug' doesn't match value
'MTd_StaticDebug' in mesa.lib(uniform_query.obj)

I understand that this is due to mismatch between the runtime libraries,
but I don't know how to fix it.
Should I change the CRT in llvm or in mesa? How should I do that?

 Thanks,
 Shervin
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/5] Improve CSE

This series is a blending of some work that I have done and some work
that Matt did.  This series and some patches that didn't pan out (at
least not after some other recent chages) are available on the cse-neg
branch of my fdo tree.

The overall results for the series are:

GM45:
total instructions in shared programs: 4063165 -> 4059575 (-0.09%)
instructions in affected programs: 320182 -> 316592 (-1.12%)
helped:1436
HURT:  33

GM45 NIR:
total instructions in shared programs: 4082044 -> 4078671 (-0.08%)
instructions in affected programs: 291225 -> 287852 (-1.16%)
helped:1360
HURT:  57

Iron Lake:
total instructions in shared programs: 5480334 -> 5476586 (-0.07%)
instructions in affected programs: 400843 -> 397095 (-0.94%)
helped:1602
HURT:  60

Iron Lake NIR:
total instructions in shared programs: 5678776 -> 5675486 (-0.06%)
instructions in affected programs: 337985 -> 334695 (-0.97%)
helped:1461
HURT:  101

Sandy Bridge:
total instructions in shared programs: 7310035 -> 7305478 (-0.06%)
instructions in affected programs: 370635 -> 366078 (-1.23%)
helped:2791
HURT:  83
LOST:  8

Sandy Bridge NIR:
total instructions in shared programs: 7329995 -> 7324864 (-0.07%)
instructions in affected programs: 819282 -> 814151 (-0.63%)
helped:3704
HURT:  906
GAINED:4

Ivy Bridge:
total instructions in shared programs: 6766579 -> 6762664 (-0.06%)
instructions in affected programs: 351828 -> 347913 (-1.11%)
helped:2738
HURT:  79
GAINED:1

Ivy Bridge NIR:
total instructions in shared programs: 6769314 -> 6763686 (-0.08%)
instructions in affected programs: 641188 -> 635560 (-0.88%)
helped:3619
HURT:  219
GAINED:2

Haswell:
total instructions in shared programs: 6226294 -> 6222448 (-0.06%)
instructions in affected programs: 338353 -> 334507 (-1.14%)
helped:2734
HURT:  84
GAINED:1

Haswell NIR:
total instructions in shared programs: 6183693 -> 6179653 (-0.07%)
instructions in affected programs: 454070 -> 450030 (-0.89%)
helped:3114
HURT:  215
GAINED:2

Broadwell:
total instructions in shared programs: 7285895 -> 7284103 (-0.02%)
instructions in affected programs: 177765 -> 175973 (-1.01%)
helped:960
HURT:  101

Broadwell NIR:
total instructions in shared programs: 7501711 -> 7499619 (-0.03%)
instructions in affected programs: 705285 -> 703193 (-0.30%)
helped:2244
HURT:  398
GAINED:2


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/5] glsl: Propagate negates through multiplication chains.