Re: [Mesa-dev] [PATCH 02/10] nir: Properly clean up CF nodes when we remove them

2015-02-09 Thread Jason Ekstrand
On Thu, Feb 5, 2015 at 3:27 PM, Connor Abbott  wrote:

> While we're cleaning this up, we should go a little farther - we
> should also see if anything is using the SSA result of any of these
> instructions, and if so make it point to an undef_instr instead to
> avoid dangling pointers. This doesn't come up yet but it will when we
> start cleaning up constant control flow.
>

I'm about to send a v2 and I'm leaving this out for now.  First, because
that's a bit more invasive than I want to do/test right now and second,
because I'm not convinced it's needed.  For control flow, what we really
need to do is assert that there are no phi nodes.  As long as that and the
dominance property both hold, we won't have any uses of deleted SSA values.


> On Thu, Feb 5, 2015 at 5:28 PM, Jason Ekstrand 
> wrote:
> > Previously, if you remved a CF node that still had instructions in it,
> none
> > of the use/def information from those instructions would get cleaned up.
> > Also, we weren't removing if statements from the if_uses of the
> > corresponding register or SSA def.  This commit fixes both of these
> > problems
> > ---
> >  src/glsl/nir/nir.c | 54
> ++
> >  1 file changed, 54 insertions(+)
> >
> > diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> > index 9a88bd3..8ea7bb5 100644
> > --- a/src/glsl/nir/nir.c
> > +++ b/src/glsl/nir/nir.c
> > @@ -1148,6 +1148,58 @@ stitch_blocks(nir_block *before, nir_block *after)
> > exec_node_remove(&after->cf_node.node);
> >  }
> >
> > +static void
> > +remove_defs_uses(nir_instr *instr);
> > +
> > +static void
> > +cleanup_cf_node(nir_cf_node *node)
> > +{
> > +   switch (node->type) {
> > +   case nir_cf_node_block: {
> > +  nir_block *block = nir_cf_node_as_block(node);
> > +  /* We need to walk the instructions and clean up defs/uses */
> > +  nir_foreach_instr(block, instr)
> > + remove_defs_uses(instr);
> > +  break;
> > +   }
> > +
> > +   case nir_cf_node_if: {
> > +  nir_if *if_stmt = nir_cf_node_as_if(node);
> > +  foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
> > + cleanup_cf_node(child);
> > +  foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
> > + cleanup_cf_node(child);
> > +
> > +  struct set *if_uses;
> > +  if (if_stmt->condition.is_ssa) {
> > + if_uses = if_stmt->condition.ssa->if_uses;
> > +  } else {
> > + if_uses = if_stmt->condition.reg.reg->if_uses;
> > +  }
> > +
> > +  struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
> > +  assert(entry);
> > +  _mesa_set_remove(if_uses, entry);
> > +  break;
> > +   }
> > +
> > +   case nir_cf_node_loop: {
> > +  nir_loop *loop = nir_cf_node_as_loop(node);
> > +  foreach_list_typed(nir_cf_node, child, node, &loop->body)
> > + cleanup_cf_node(child);
> > +  break;
> > +   }
> > +   case nir_cf_node_function: {
> > +  nir_function_impl *impl = nir_cf_node_as_function(node);
> > +  foreach_list_typed(nir_cf_node, child, node, &impl->body)
> > + cleanup_cf_node(child);
> > +  break;
> > +   }
> > +   default:
> > +  unreachable("Invalid CF node type");
> > +   }
> > +}
> > +
> >  void
> >  nir_cf_node_remove(nir_cf_node *node)
> >  {
> > @@ -1175,6 +1227,8 @@ nir_cf_node_remove(nir_cf_node *node)
> >exec_node_remove(&node->node);
> >stitch_blocks(before_block, after_block);
> > }
> > +
> > +   cleanup_cf_node(node);
> >  }
> >
> >  static bool
> > --
> > 2.2.2
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 03/12] nir/from_ssa: Use the nir_block_dominance function instead of our own

2015-02-09 Thread Jason Ekstrand
Reviewed-by: Connor Abbott 
---
 src/glsl/nir/nir_from_ssa.c | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 2e7add3..3625237 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -54,13 +54,8 @@ ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b)
} else if (a->parent_instr->block == b->parent_instr->block) {
   return a->live_index <= b->live_index;
} else {
-  nir_block *block = b->parent_instr->block;
-  while (block->imm_dom != NULL) {
- if (block->imm_dom == a->parent_instr->block)
-return true;
- block = block->imm_dom;
-  }
-  return false;
+  return nir_block_dominates(a->parent_instr->block,
+ b->parent_instr->block);
}
 }
 
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 09/12] nir: Make nir_[cf_node/instr]_[prev/next] return null if at the end

2015-02-09 Thread Jason Ekstrand
---
 src/glsl/nir/nir.h | 28 ++--
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 5d84343..f4056f4 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -416,15 +416,23 @@ typedef struct {
 } nir_instr;
 
 static inline nir_instr *
-nir_instr_next(const nir_instr *instr)
+nir_instr_next(nir_instr *instr)
 {
-   return exec_node_data(nir_instr, (instr)->node.next, node);
+   struct exec_node *next = exec_node_get_next(&instr->node);
+   if (exec_node_is_tail_sentinel(next))
+  return NULL;
+   else
+  return exec_node_data(nir_instr, next, node);
 }
 
 static inline nir_instr *
-nir_instr_prev(const nir_instr *instr)
+nir_instr_prev(nir_instr *instr)
 {
-   return exec_node_data(nir_instr, (instr)->node.prev, node);
+   struct exec_node *prev = exec_node_get_prev(&instr->node);
+   if (exec_node_is_head_sentinel(prev))
+  return NULL;
+   else
+  return exec_node_data(nir_instr, prev, node);
 }
 
 typedef struct {
@@ -1272,13 +1280,21 @@ typedef struct {
 static inline nir_cf_node *
 nir_cf_node_next(nir_cf_node *node)
 {
-   return exec_node_data(nir_cf_node, exec_node_get_next(&node->node), node);
+   struct exec_node *next = exec_node_get_next(&node->node);
+   if (exec_node_is_tail_sentinel(next))
+  return NULL;
+   else
+  return exec_node_data(nir_cf_node, next, node);
 }
 
 static inline nir_cf_node *
 nir_cf_node_prev(nir_cf_node *node)
 {
-   return exec_node_data(nir_cf_node, exec_node_get_prev(&node->node), node);
+   struct exec_node *prev = exec_node_get_prev(&node->node);
+   if (exec_node_is_head_sentinel(prev))
+  return NULL;
+   else
+  return exec_node_data(nir_cf_node, prev, node);
 }
 
 static inline bool
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 07/12] nir/validate: Validate SSA defs the same way we do for registers

2015-02-09 Thread Jason Ekstrand
Reviewed-by: Connor Abbott 
---
 src/glsl/nir/nir_validate.c | 87 -
 1 file changed, 79 insertions(+), 8 deletions(-)

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index 13010ae..a3fe9d6 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -51,6 +51,15 @@ typedef struct {
 } reg_validate_state;
 
 typedef struct {
+   /*
+* equivalent to the uses in nir_ssa_def, but built up by the validator.
+* At the end, we verify that the sets have the same entries.
+*/
+   struct set *uses, *if_uses;
+   nir_function_impl *where_defined;
+} ssa_def_validate_state;
+
+typedef struct {
/* map of register -> validation state (struct above) */
struct hash_table *regs;
 
@@ -135,19 +144,21 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state)
 
assert(entry);
 
-   assert((nir_function_impl *) entry->data == state->impl &&
-  "using an SSA value defined in a different function");
+   ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
 
-   struct set_entry *entry2;
+   assert(def_state->where_defined == state->impl &&
+  "using an SSA value defined in a different function");
 
if (state->instr) {
-  entry2 = _mesa_set_search(def->uses, state->instr);
+  _mesa_set_add(def_state->uses, state->instr);
+
+  assert(_mesa_set_search(def->uses, state->instr));
} else {
   assert(state->if_stmt);
-  entry2 = _mesa_set_search(def->if_uses, state->if_stmt);
-   }
+  _mesa_set_add(def_state->if_uses, state->if_stmt);
 
-   assert(entry2 && "SSA use missing");
+  assert(_mesa_set_search(def->if_uses, state->if_stmt));
+   }
 
/* TODO validate that the use is dominated by the definition */
 }
@@ -226,7 +237,15 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state)
BITSET_SET(state->ssa_defs_found, def->index);
 
assert(def->num_components <= 4);
-   _mesa_hash_table_insert(state->ssa_defs, def, state->impl);
+
+   ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
+  ssa_def_validate_state);
+   def_state->where_defined = state->impl;
+   def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+  _mesa_key_pointer_equal);
+   def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+   _mesa_hash_table_insert(state->ssa_defs, def, def_state);
 }
 
 static void
@@ -759,6 +778,56 @@ validate_var_decl(nir_variable *var, bool is_global, 
validate_state *state)
}
 }
 
+static bool
+postvalidate_ssa_def(nir_ssa_def *def, void *void_state)
+{
+   validate_state *state = void_state;
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+   ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+   if (def_state->uses->entries != def->uses->entries) {
+  printf("extra entries in SSA def uses:\n");
+  struct set_entry *entry;
+  set_foreach(def->uses, entry) {
+ struct set_entry *entry2 =
+_mesa_set_search(def_state->uses, entry->key);
+
+ if (entry2 == NULL) {
+printf("%p\n", entry->key);
+ }
+  }
+
+  abort();
+   }
+
+   if (def_state->if_uses->entries != def->if_uses->entries) {
+  printf("extra entries in SSA def uses:\n");
+  struct set_entry *entry;
+  set_foreach(def->if_uses, entry) {
+ struct set_entry *entry2 =
+_mesa_set_search(def_state->if_uses, entry->key);
+
+ if (entry2 == NULL) {
+printf("%p\n", entry->key);
+ }
+  }
+
+  abort();
+   }
+
+   return true;
+}
+
+static bool
+postvalidate_ssa_defs_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+  nir_foreach_ssa_def(instr, postvalidate_ssa_def, state);
+
+   return true;
+}
+
 static void
 validate_function_impl(nir_function_impl *impl, validate_state *state)
 {
@@ -809,6 +878,8 @@ validate_function_impl(nir_function_impl *impl, 
validate_state *state)
foreach_list_typed(nir_register, reg, node, &impl->registers) {
   postvalidate_reg_decl(reg, state);
}
+
+   nir_foreach_block(impl, postvalidate_ssa_defs_block, state);
 }
 
 static void
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 10/12] nir: Add an instruction index

2015-02-09 Thread Jason Ekstrand
This index, unlike the others, has to be manually updated by passes that
want to use it.

Reviewed-by: Connor Abbott 
---
 src/glsl/nir/nir.c | 20 
 src/glsl/nir/nir.h |  4 
 2 files changed, 24 insertions(+)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 8ea7bb5..6758a72 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -352,6 +352,7 @@ instr_init(nir_instr *instr, nir_instr_type type)
 {
instr->type = type;
instr->block = NULL;
+   instr->index = UINT_MAX;
exec_node_init(&instr->node);
 }
 
@@ -2081,3 +2082,22 @@ nir_index_ssa_defs(nir_function_impl *impl)
nir_foreach_block(impl, index_ssa_block, &index);
impl->ssa_alloc = index;
 }
+
+static bool
+index_instrs_block(nir_block *block, void *state)
+{
+   unsigned *index = (unsigned *)state;
+
+   nir_foreach_instr(block, instr)
+  instr->index = (*index)++;
+
+   return true;
+}
+
+unsigned
+nir_index_instrs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   nir_foreach_block(impl, index_instrs_block, &index);
+   return index;
+}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index f4056f4..90a7001 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -411,6 +411,9 @@ typedef struct {
nir_instr_type type;
struct nir_block *block;
 
+   /** generic instruction index. */
+   unsigned index;
+
/* flag for dead code elimination (see nir_opt_dce.c) */
bool live;
 } nir_instr;
@@ -1509,6 +1512,7 @@ nir_if *nir_block_get_following_if(nir_block *block);
 void nir_index_local_regs(nir_function_impl *impl);
 void nir_index_global_regs(nir_shader *shader);
 void nir_index_ssa_defs(nir_function_impl *impl);
+unsigned nir_index_instrs(nir_function_impl *impl);
 
 void nir_index_blocks(nir_function_impl *impl);
 
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass

2015-02-09 Thread Jason Ekstrand
v2 Jason Ekstrand :
 - Use nir_dominance_lca for computing least common anscestors
 - Use the block index for comparing dominance tree depths
 - Pin things that do partial derivatives
---
 src/glsl/Makefile.sources  |   1 +
 src/glsl/nir/nir.h |   2 +
 src/glsl/nir/nir_opt_gcm.c | 501 +
 3 files changed, 504 insertions(+)
 create mode 100644 src/glsl/nir/nir_opt_gcm.c

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index a580b6e..69cb2e6 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -43,6 +43,7 @@ NIR_FILES = \
nir/nir_opt_copy_propagate.c \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
+   nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_peephole_select.c \
nir/nir_opt_remove_phis.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 90a7001..55fb43d 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader);
 bool nir_opt_dce_impl(nir_function_impl *impl);
 bool nir_opt_dce(nir_shader *shader);
 
+void nir_opt_gcm(nir_shader *shader);
+
 bool nir_opt_peephole_select(nir_shader *shader);
 bool nir_opt_peephole_ffma(nir_shader *shader);
 
diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c
new file mode 100644
index 000..d48518b
--- /dev/null
+++ b/src/glsl/nir/nir_opt_gcm.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Jason Ekstrand (ja...@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements Global Code Motion.  A description of GCM can be found in
+ * "Global Code Motion; Global Value Numbering" by Cliff Click.
+ * Unfortunately, the algorithm presented in the paper is broken in a
+ * number of ways.  The algorithm used here differs substantially from the
+ * one in the paper but it is, in my opinion, much easier to read and
+ * verify correcness.
+ */
+
+struct gcm_block_info {
+   /* Number of loops this block is inside */
+   unsigned loop_depth;
+
+   /* The last instruction inserted into this block.  This is used as we
+* traverse the instructions and insert them back into the program to
+* put them in the right order.
+*/
+   nir_instr *last_instr;
+};
+
+struct gcm_state {
+   nir_function_impl *impl;
+   nir_instr *instr;
+
+   /* Marks all instructions that have been visited by the curren pass */
+   BITSET_WORD *visited;
+
+   /* Marks instructions that are "pinned", i.e. cannot be moved from their
+* basic block by code motion */
+   BITSET_WORD *pinned;
+
+   /* The list of non-pinned instructions.  As we do the late scheduling,
+* we pull non-pinned instructions out of their blocks and place them in
+* this list.  This saves us from having linked-list problems when we go
+* to put instructions back in their blocks.
+*/
+   struct exec_list instrs;
+
+   struct gcm_block_info *blocks;
+};
+
+/* Recursively walks the CFG and builds the block_info structure */
+static void
+gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
+ unsigned loop_depth)
+{
+   foreach_list_typed(nir_cf_node, node, node, cf_list) {
+  switch (node->type) {
+  case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(node);
+ state->blocks[block->index].loop_depth = loop_depth;
+ break;
+  }
+  case nir_cf_node_if: {
+ nir_if *if_stmt = nir_cf_node_as_if(node);
+ gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
+ gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
+ break;
+  }
+  case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(node);
+ gcm_build_block_info(&loop->body, state, loop_depth + 1);
+

[Mesa-dev] [PATCH v2 06/12] nir/validate: Validate if_uses on registers

2015-02-09 Thread Jason Ekstrand
Reviewed-by: Connor Abbott 
---
 src/glsl/nir/nir_validate.c | 62 -
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index a34a017..13010ae 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -46,7 +46,7 @@ typedef struct {
 * equivalent to the uses and defs in nir_register, but built up by the
 * validator. At the end, we verify that the sets have the same entries.
 */
-   struct set *uses, *defs;
+   struct set *uses, *if_uses, *defs;
nir_function_impl *where_defined; /* NULL for global registers */
 } reg_validate_state;
 
@@ -92,16 +92,22 @@ validate_reg_src(nir_reg_src *src, validate_state *state)
 {
assert(src->reg != NULL);
 
-   struct set_entry *entry = _mesa_set_search(src->reg->uses, state->instr);
-   assert(entry && "use not in nir_register.uses");
+   struct hash_entry *entry;
+   entry = _mesa_hash_table_search(state->regs, src->reg);
+   assert(entry);
 
-   struct hash_entry *entry2;
-   entry2 = _mesa_hash_table_search(state->regs, src->reg);
+   reg_validate_state *reg_state = (reg_validate_state *) entry->data;
 
-   assert(entry2);
+   if (state->instr) {
+  _mesa_set_add(reg_state->uses, state->instr);
 
-   reg_validate_state *reg_state = (reg_validate_state *) entry2->data;
-   _mesa_set_add(reg_state->uses, state->instr);
+  assert(_mesa_set_search(src->reg->uses, state->instr));
+   } else {
+  assert(state->if_stmt);
+  _mesa_set_add(reg_state->if_uses, state->if_stmt);
+
+  assert(_mesa_set_search(src->reg->if_uses, state->if_stmt));
+   }
 
if (!src->reg->is_global) {
   assert(reg_state->where_defined == state->impl &&
@@ -134,7 +140,12 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state)
 
struct set_entry *entry2;
 
-   entry2 = _mesa_set_search(def->uses, state->instr);
+   if (state->instr) {
+  entry2 = _mesa_set_search(def->uses, state->instr);
+   } else {
+  assert(state->if_stmt);
+  entry2 = _mesa_set_search(def->if_uses, state->if_stmt);
+   }
 
assert(entry2 && "SSA use missing");
 
@@ -484,6 +495,8 @@ validate_instr(nir_instr *instr, validate_state *state)
   assert(!"Invalid ALU instruction type");
   break;
}
+
+   state->instr = NULL;
 }
 
 static void
@@ -501,6 +514,7 @@ validate_phi_src(nir_phi_instr *instr, nir_block *pred, 
validate_state *state)
 instr->dest.ssa.num_components);
 
  validate_src(&src->src, state);
+ state->instr = NULL;
  return;
   }
}
@@ -562,6 +576,8 @@ validate_block(nir_block *block, validate_state *state)
 static void
 validate_if(nir_if *if_stmt, validate_state *state)
 {
+   state->if_stmt = if_stmt;
+
assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev));
nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
assert(prev_node->type == nir_cf_node_block);
@@ -576,15 +592,7 @@ validate_if(nir_if *if_stmt, validate_state *state)
nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
assert(next_node->type == nir_cf_node_block);
 
-   if (!if_stmt->condition.is_ssa) {
-  nir_register *reg = if_stmt->condition.reg.reg;
-  struct set_entry *entry =  _mesa_set_search(reg->if_uses, if_stmt);
-  assert(entry);
-   } else {
-  nir_ssa_def *def = if_stmt->condition.ssa;
-  struct set_entry *entry = _mesa_set_search(def->if_uses, if_stmt);
-  assert(entry);
-   }
+   validate_src(&if_stmt->condition, state);
 
assert(!exec_list_is_empty(&if_stmt->then_list));
assert(!exec_list_is_empty(&if_stmt->else_list));
@@ -603,6 +611,7 @@ validate_if(nir_if *if_stmt, validate_state *state)
}
 
state->parent_node = old_parent;
+   state->if_stmt = NULL;
 }
 
 static void
@@ -672,6 +681,8 @@ prevalidate_reg_decl(nir_register *reg, bool is_global, 
validate_state *state)
reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
   _mesa_key_pointer_equal);
+   reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
   _mesa_key_pointer_equal);
 
@@ -702,6 +713,21 @@ postvalidate_reg_decl(nir_register *reg, validate_state 
*state)
   abort();
}
 
+   if (reg_state->if_uses->entries != reg->if_uses->entries) {
+  printf("extra entries in register if_uses:\n");
+  struct set_entry *entry;
+  set_foreach(reg->if_uses, entry) {
+ struct set_entry *entry2 =
+_mesa_set_search(reg_state->if_uses, entry->key);
+
+ if (entry2 == NULL) {
+printf("%p\n", entry->key);
+ }
+  }
+
+  abort();
+   }
+
if (reg_state->defs->entries 

[Mesa-dev] [PATCH v2 08/12] nir/from_ssa: Don't try to read an invalid instruction

2015-02-09 Thread Jason Ekstrand
---
 src/glsl/nir/nir_from_ssa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index 3625237..7c50095 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -272,7 +272,7 @@ get_parallel_copy_at_end_of_block(nir_block *block)
if (last_instr->type == nir_instr_type_jump)
   last_instr = nir_instr_prev(last_instr);
 
-   if (last_instr->type == nir_instr_type_parallel_copy)
+   if (last_instr && last_instr->type == nir_instr_type_parallel_copy)
   return nir_instr_as_parallel_copy(last_instr);
else
   return NULL;
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 05/12] nir: Properly clean up CF nodes when we remove them

2015-02-09 Thread Jason Ekstrand
Previously, if you remved a CF node that still had instructions in it, none
of the use/def information from those instructions would get cleaned up.
Also, we weren't removing if statements from the if_uses of the
corresponding register or SSA def.  This commit fixes both of these
problems
---
 src/glsl/nir/nir.c | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 9a88bd3..8ea7bb5 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1148,6 +1148,58 @@ stitch_blocks(nir_block *before, nir_block *after)
exec_node_remove(&after->cf_node.node);
 }
 
+static void
+remove_defs_uses(nir_instr *instr);
+
+static void
+cleanup_cf_node(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block: {
+  nir_block *block = nir_cf_node_as_block(node);
+  /* We need to walk the instructions and clean up defs/uses */
+  nir_foreach_instr(block, instr)
+ remove_defs_uses(instr);
+  break;
+   }
+
+   case nir_cf_node_if: {
+  nir_if *if_stmt = nir_cf_node_as_if(node);
+  foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+ cleanup_cf_node(child);
+  foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+ cleanup_cf_node(child);
+
+  struct set *if_uses;
+  if (if_stmt->condition.is_ssa) {
+ if_uses = if_stmt->condition.ssa->if_uses;
+  } else {
+ if_uses = if_stmt->condition.reg.reg->if_uses;
+  }
+
+  struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
+  assert(entry);
+  _mesa_set_remove(if_uses, entry);
+  break;
+   }
+
+   case nir_cf_node_loop: {
+  nir_loop *loop = nir_cf_node_as_loop(node);
+  foreach_list_typed(nir_cf_node, child, node, &loop->body)
+ cleanup_cf_node(child);
+  break;
+   }
+   case nir_cf_node_function: {
+  nir_function_impl *impl = nir_cf_node_as_function(node);
+  foreach_list_typed(nir_cf_node, child, node, &impl->body)
+ cleanup_cf_node(child);
+  break;
+   }
+   default:
+  unreachable("Invalid CF node type");
+   }
+}
+
 void
 nir_cf_node_remove(nir_cf_node *node)
 {
@@ -1175,6 +1227,8 @@ nir_cf_node_remove(nir_cf_node *node)
   exec_node_remove(&node->node);
   stitch_blocks(before_block, after_block);
}
+
+   cleanup_cf_node(node);
 }
 
 static bool
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 12/12] i965/nir: Use Global Code Motion

2015-02-09 Thread Jason Ekstrand
v2: Do GCM after dead code elimination
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index da81b61..a0e6b1d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -44,6 +44,8 @@ nir_optimize(nir_shader *nir)
   nir_validate_shader(nir);
   progress |= nir_opt_dce(nir);
   nir_validate_shader(nir);
+  nir_opt_gcm(nir);
+  nir_validate_shader(nir);
   progress |= nir_opt_cse(nir);
   nir_validate_shader(nir);
   progress |= nir_opt_peephole_select(nir);
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 04/12] nir: use nir_foreach_ssa_def for indexing ssa defs

2015-02-09 Thread Jason Ekstrand
This is both simpler and more correct.  The old code didn't properly index
load_const instructions.

Reviewed-by: Connor Abbott 
---
 src/glsl/nir/nir.c | 26 --
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 10e6ed3..9a88bd3 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -2003,37 +2003,19 @@ nir_index_blocks(nir_function_impl *impl)
 }
 
 static void
-index_ssa_def(nir_ssa_def *def, unsigned *index)
+index_ssa_def_cb(nir_ssa_def *def, void *state)
 {
+   unsigned *index = (unsigned *) state;
def->index = (*index)++;
-}
 
-static bool
-index_ssa_def_cb(nir_dest *dest, void *state)
-{
-   unsigned *index = (unsigned *) state;
-   if (dest->is_ssa)
-  index_ssa_def(&dest->ssa, index);
return true;
 }
 
-static void
-index_ssa_undef(nir_ssa_undef_instr *instr, unsigned *index)
-{
-   index_ssa_def(&instr->def, index);
-}
-
 static bool
 index_ssa_block(nir_block *block, void *state)
 {
-   unsigned *index = (unsigned *) state;
-
-   nir_foreach_instr(block, instr) {
-  if (instr->type == nir_instr_type_ssa_undef)
- index_ssa_undef(nir_instr_as_ssa_undef(instr), index);
-  else
- nir_foreach_dest(instr, index_ssa_def_cb, state);
-   }
+   nir_foreach_instr(block, instr)
+  nir_foreach_ssa_def(instr, index_ssa_def_cb, state);
 
return true;
 }
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 01/12] nir/dominance: Expose the dominance intersection function

2015-02-09 Thread Jason Ekstrand
Being able to find the least common anscestor in the dominance tree is a
useful thing that we may want to do in other passes.  In particular, we
need it for GCM.

v2: Handle NULL inputs by returning the other block
---
 src/glsl/nir/nir.h   |  2 ++
 src/glsl/nir/nir_dominance.c | 22 ++
 2 files changed, 24 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4cb2e92..886dcd2 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1500,6 +1500,8 @@ static inline void nir_validate_shader(nir_shader 
*shader) { }
 void nir_calc_dominance_impl(nir_function_impl *impl);
 void nir_calc_dominance(nir_shader *shader);
 
+nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+
 void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
 void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
 
diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c
index 67fdcc6..831b9a3 100644
--- a/src/glsl/nir/nir_dominance.c
+++ b/src/glsl/nir/nir_dominance.c
@@ -212,6 +212,28 @@ nir_calc_dominance(nir_shader *shader)
}
 }
 
+/**
+ * Computes the least common anscestor of two blocks.  If one of the blocks
+ * is null, the other block is returned.
+ */
+nir_block *
+nir_dominance_lca(nir_block *b1, nir_block *b2)
+{
+   if (b1 == NULL)
+  return b2;
+
+   if (b2 == NULL)
+  return b1;
+
+   assert(nir_cf_node_get_function(&b1->cf_node) ==
+  nir_cf_node_get_function(&b2->cf_node));
+
+   assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
+  nir_metadata_dominance);
+
+   return intersect(b1, b2);
+}
+
 static bool
 dump_block_dom(nir_block *block, void *state)
 {
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 02/12] nir/dominance: Add a constant-time mechanism for comparing blocks

2015-02-09 Thread Jason Ekstrand
This is mostly thanks to Connor.  The idea is to do a depth-first search
that computes pre and post indices for all the blocks.  We can then figure
out if one block dominates another in constant time by two simple
comparison operations.

Reviewed-by: Connor Abbott 
---
 src/glsl/nir/nir.h   |  9 +
 src/glsl/nir/nir_dominance.c | 30 ++
 2 files changed, 39 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 886dcd2..5d84343 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1135,6 +1135,14 @@ typedef struct nir_block {
/* Set of nir_block's on the dominance frontier of this block */
struct set *dom_frontier;
 
+   /*
+* These two indices have the property that dom_{pre,post}_index for each
+* child of this block in the dominance tree will always be between
+* dom_pre_index and dom_post_index for this block, which makes testing if
+* a given block is dominated by another block an O(1) operation.
+*/
+   unsigned dom_pre_index, dom_post_index;
+
/* live in and out for this block; used for liveness analysis */
BITSET_WORD *live_in;
BITSET_WORD *live_out;
@@ -1501,6 +1509,7 @@ void nir_calc_dominance_impl(nir_function_impl *impl);
 void nir_calc_dominance(nir_shader *shader);
 
 nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+bool nir_block_dominates(nir_block *parent, nir_block *child);
 
 void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
 void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c
index 831b9a3..d69e4f4 100644
--- a/src/glsl/nir/nir_dominance.c
+++ b/src/glsl/nir/nir_dominance.c
@@ -177,6 +177,17 @@ calc_dom_children(nir_function_impl* impl)
nir_foreach_block(impl, block_add_child, NULL);
 }
 
+static void
+calc_dfs_indicies(nir_block *block, unsigned *index)
+{
+   block->dom_pre_index = (*index)++;
+
+   for (unsigned i = 0; i < block->num_dom_children; i++)
+  calc_dfs_indicies(block->dom_children[i], index);
+
+   block->dom_post_index = (*index)++;
+}
+
 void
 nir_calc_dominance_impl(nir_function_impl *impl)
 {
@@ -201,6 +212,9 @@ nir_calc_dominance_impl(nir_function_impl *impl)
impl->start_block->imm_dom = NULL;
 
calc_dom_children(impl);
+
+   unsigned dfs_index = 0;
+   calc_dfs_indicies(impl->start_block, &dfs_index);
 }
 
 void
@@ -234,6 +248,22 @@ nir_dominance_lca(nir_block *b1, nir_block *b2)
return intersect(b1, b2);
 }
 
+/**
+ * Returns true if parent dominates child
+ */
+bool
+nir_block_dominates(nir_block *parent, nir_block *child)
+{
+   assert(nir_cf_node_get_function(&parent->cf_node) ==
+  nir_cf_node_get_function(&child->cf_node));
+
+   assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
+  nir_metadata_dominance);
+
+   return child->dom_pre_index >= parent->dom_pre_index &&
+  child->dom_post_index <= parent->dom_post_index;
+}
+
 static bool
 dump_block_dom(nir_block *block, void *state)
 {
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 84570] Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=84570

--- Comment #32 from Kai  ---
(In reply to Michel Dänzer from comment #31)
> (In reply to Kai from comment #30)
> > Michel, is there any chance attachment 107544 will be
> > part of 3.18?
> 
> No, but it's in Alex's queue for 3.19.

3.19 has been released. As I'm not sure, what the situation with Intel GPUs is,
it's not clear to me, whether this bug should be closed, renamed or duplicated
(with this one being closed afterwards for Radeon GPUs and the other renamed to
Intel: …). But for me the game ran pretty well over the past weeks with the
various RC versions of 3.19. Michel/Tapani, what do you think?

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 84570] Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=84570

--- Comment #33 from Kenneth Graunke  ---
Aspyr's latest release contains bugfixes related to this, and that's solved the
problem on Intel.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Expected wide line rendering with clipping

2015-02-09 Thread Iago Toral
On Fri, 2015-02-06 at 21:27 +0100, Roland Scheidegger wrote:
> Am 06.02.2015 um 13:11 schrieb Iago Toral:
> > Hi,
> > 
> > Eduardo and I have been looking into a few dEQP test failures that deal
> > with wide line rendering. There are a few of them that fail because of
> > how clipping is implemented for this case.
> > 
> > The problem in these cases seems to be that the hw renders the wide line
> > as a parallelogram so that if an edge of the parallelogram is partially
> > clipped, the other parallel edge will also be clipped at the same
> > height/width so that the resulting wide line stays a parallelogram. The
> > dEQP renders as if a wide line where a collection of 1px lines, so
> > cliping one edge of the resulting line does not have implications for
> > the other edge.
> > 
> > This ASCII art illustrates the problem (* represent line pixels, |
> > represents the viewport's rightmost edge):
> > 
> >  Expected by dEQP  i965 rendering
> >  |  |
> > *|  |
> >**|  |
> >   ***|  |
> >  |  |
> >  |  |
> >  |  |
> > 
> > We can make the rendering result match dEQP's expectation by enabling
> > the GuardBand in the clip stage (GEN6_CLIP_GB_TEST). This is being
> > disabled by the driver for gen7 in this scenario because the underlying
> > framebuffer surface is larger than the viewport, and per the comment in
> > the code, in gen7 that would make it render beyond the viewport limits
> > in that surface, while it notes that gen8 hw fixes this. So I guess that
> > we do not want to do this in any case in gen7.
> > 
> > Then, I have been reviewing the OpenGL specs to see if they clarify what
> > should happen when clipping wide lines and I think the spec does not
> > make a clear point, since when it talks about line clipping it does not
> > cover the case of wide lines specifically:
> > 
> > "13.5. PRIMITIVE CLIPPING
> > ...
> > If part of the line segment lies in the volume and part lies outside,
> > then the line segment is clipped and new vertex coordinates are computed
> > for one or both vertices. A clipped line segment endpoint lies on both
> > the original line segment and the boundary of the clip volume.
> > ...
> > "
> > 
> > The above description is clear for 1px lines, but where should the new
> > vertex be generated exactly for wide lines, where many points could be
> > at the boundary of the clip volume? Is any point valid? (that would mean
> > the dEQP test is bogus because there are multiple accepted renderings),
> > should the new vertex be exactly at the center of the line? (that would
> > make both deqp and intel rendering bogus).
> > 
> > Then there is also this comment in "14.5.2.2 Wide Lines" inside the
> > non-AA line rasterization chapter:
> > 
> > "Non-antialiased line segments of width other than one are rasterized by
> > off-setting them in the minor direction (for an x-major line, the minor
> > direction is y, and for a y-major line, the minor direction is x) and
> > replicating fragments in the minor direction (see figure 14.3). Let w be
> > the width rounded to the nearest integer (if w = 0, then it is as if w =
> > 1). If the line segment has endpoints given by (x 0 , y 0 ) and (x 1 , y
> > 1 ) in window coordinates, the segment with endpoints (x 0 , y 0 − (w −
> > 1)/2) and (x 1 , y 1 − (w − 1)/2) is rasterized, but instead of a single
> > fragment, a column of fragments of height w (a row of fragments of
> > length w for a y-major segment) is produced at each x (y for y-major)
> > location. The lowest fragment of this column is the fragment that would
> > be produced by rasterizing the segment of width 1 with the modified
> > coordinates."
> > 
> > This "suggests" that wide line rendering should be equivalent to
> > rendering multiple lines of width=1 and if we actually did that then the
> > expected rendering result after clipping would be what dEQP expects.
> > 
> > I think the whole thing isn't 100% clear. Does anyone have any thoughts
> > on what should be considered correct behavior in this case?
> > 
> > Iago
> > 
> > 
> 
> I am quite sure that i965 rendering is what (at least legacy GL)
> expects. This is because the wide-line expansion is defined to be
> happening at rasterization time, i.e. past clipping. Thus, at clipping
> time, no matter if a line is wide or not, it is still really a line,
> hence clipping will produce a single new endpoint at the viewport edge
> and in rasterization this is then what will be used as the endpoint.

That makes sense, but in that case I think i965 rendering would still be
incorrect, the expected result would look something like this:

  |
  |
**|
   ***|
  |
  |
  |

However, as far as I can tell, since i965 hardware renders wide lines as
parallelograms, it can't render something like this

[Mesa-dev] [PATCH v4] glsl: Linking support for doubles

2015-02-09 Thread Ilia Mirkin
From: Dave Airlie 

Signed-off-by: Dave Airlie 
Reviewed-by: Ian Romanick 
---

Removed slot multiplication in link_varyings, since component_slots
now takes care of it.

 src/glsl/link_uniform_initializers.cpp | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/glsl/link_uniform_initializers.cpp 
b/src/glsl/link_uniform_initializers.cpp
index f6a60bc..6907384 100644
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -75,6 +75,11 @@ copy_constant_to_storage(union gl_constant_value *storage,
   case GLSL_TYPE_FLOAT:
 storage[i].f = val->value.f[i];
 break;
+  case GLSL_TYPE_DOUBLE:
+ /* XXX need to check on big-endian */
+ storage[i * 2].u = *(uint32_t *)&val->value.d[i];
+ storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1);
+ break;
   case GLSL_TYPE_BOOL:
 storage[i].b = val->value.b[i] ? boolean_true : 0;
 break;
@@ -200,6 +205,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program 
*prog,
 val->array_elements[0]->type->base_type;
   const unsigned int elements = val->array_elements[0]->type->components();
   unsigned int idx = 0;
+  unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1;
 
   assert(val->type->length >= storage->array_elements);
   for (unsigned int i = 0; i < storage->array_elements; i++) {
@@ -209,7 +215,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program 
*prog,
   elements,
   boolean_true);
 
-idx += elements;
+idx += elements * dmul;
   }
} else {
   copy_constant_to_storage(storage->storage,
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4] glsl: Uniform linking support for doubles

2015-02-09 Thread Ilia Mirkin
From: Dave Airlie 

Signed-off-by: Dave Airlie 
---

Removed now-unnecessary component_slots multiplication, check the base
type without array.

 src/glsl/link_uniforms.cpp | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index de2f6c9..88d9b74 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -608,7 +608,11 @@ private:
 }
 
 if (type->without_array()->is_matrix()) {
-   this->uniforms[id].matrix_stride = 16;
+const unsigned N = type->without_array()->base_type == 
GLSL_TYPE_DOUBLE ? 8 : 4;
+const unsigned items = row_major ? type->matrix_columns : 
type->vector_elements;
+
+assert(items <= 4);
+this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
this->uniforms[id].row_major = row_major;
 } else {
this->uniforms[id].matrix_stride = 0;
-- 
2.0.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4] glsl: Add double builtin type generation

2015-02-09 Thread Ilia Mirkin
From: Dave Airlie 

Signed-off-by: Dave Airlie 
Reviewed-by: Matt Turner 
Reviewed-by: Ian Romanick 
---

Made component_slots() return 2 instead of 1 for double. Also added to
list for uniform_locations(), returning 1.

 src/glsl/builtin_type_macros.h |  16 ++
 src/glsl/builtin_types.cpp |  30 +++
 src/glsl/glsl_parser_extras.h  |   5 ++
 src/glsl/glsl_types.cpp| 112 +
 src/glsl/glsl_types.h  |  19 ++-
 5 files changed, 159 insertions(+), 23 deletions(-)

diff --git a/src/glsl/builtin_type_macros.h b/src/glsl/builtin_type_macros.h
index 236e1ce..bf74eb3 100644
--- a/src/glsl/builtin_type_macros.h
+++ b/src/glsl/builtin_type_macros.h
@@ -64,6 +64,22 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3)
 DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4)
 DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4)
 
+DECL_TYPE(double,  GL_DOUBLE,GLSL_TYPE_DOUBLE, 1, 1)
+DECL_TYPE(dvec2,   GL_DOUBLE_VEC2,   GLSL_TYPE_DOUBLE, 2, 1)
+DECL_TYPE(dvec3,   GL_DOUBLE_VEC3,   GLSL_TYPE_DOUBLE, 3, 1)
+DECL_TYPE(dvec4,   GL_DOUBLE_VEC4,   GLSL_TYPE_DOUBLE, 4, 1)
+
+DECL_TYPE(dmat2,   GL_DOUBLE_MAT2,   GLSL_TYPE_DOUBLE, 2, 2)
+DECL_TYPE(dmat3,   GL_DOUBLE_MAT3,   GLSL_TYPE_DOUBLE, 3, 3)
+DECL_TYPE(dmat4,   GL_DOUBLE_MAT4,   GLSL_TYPE_DOUBLE, 4, 4)
+
+DECL_TYPE(dmat2x3, GL_DOUBLE_MAT2x3, GLSL_TYPE_DOUBLE, 3, 2)
+DECL_TYPE(dmat2x4, GL_DOUBLE_MAT2x4, GLSL_TYPE_DOUBLE, 4, 2)
+DECL_TYPE(dmat3x2, GL_DOUBLE_MAT3x2, GLSL_TYPE_DOUBLE, 2, 3)
+DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
+DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
+DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
+
 DECL_TYPE(sampler1D, GL_SAMPLER_1D,   
GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler2D, GL_SAMPLER_2D,   
GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler3D, GL_SAMPLER_3D,   
GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D,   0, 0, GLSL_TYPE_FLOAT)
diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp
index 10fac0f..fef86df 100644
--- a/src/glsl/builtin_types.cpp
+++ b/src/glsl/builtin_types.cpp
@@ -159,6 +159,20 @@ const static struct builtin_type_versions {
T(mat4x2,  120, 300)
T(mat4x3,  120, 300)
 
+   T(double,  400, 999)
+   T(dvec2,   400, 999)
+   T(dvec3,   400, 999)
+   T(dvec4,   400, 999)
+   T(dmat2,   400, 999)
+   T(dmat3,   400, 999)
+   T(dmat4,   400, 999)
+   T(dmat2x3, 400, 999)
+   T(dmat2x4, 400, 999)
+   T(dmat3x2, 400, 999)
+   T(dmat3x4, 400, 999)
+   T(dmat4x2, 400, 999)
+   T(dmat4x3, 400, 999)
+
T(sampler1D,   110, 999)
T(sampler2D,   110, 100)
T(sampler3D,   110, 300)
@@ -361,5 +375,21 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state 
*state)
if (state->ARB_shader_atomic_counters_enable) {
   add_type(symbols, glsl_type::atomic_uint_type);
}
+
+   if (state->ARB_gpu_shader_fp64_enable) {
+  add_type(symbols, glsl_type::double_type);
+  add_type(symbols, glsl_type::dvec2_type);
+  add_type(symbols, glsl_type::dvec3_type);
+  add_type(symbols, glsl_type::dvec4_type);
+  add_type(symbols, glsl_type::dmat2_type);
+  add_type(symbols, glsl_type::dmat3_type);
+  add_type(symbols, glsl_type::dmat4_type);
+  add_type(symbols, glsl_type::dmat2x3_type);
+  add_type(symbols, glsl_type::dmat2x4_type);
+  add_type(symbols, glsl_type::dmat3x2_type);
+  add_type(symbols, glsl_type::dmat3x4_type);
+  add_type(symbols, glsl_type::dmat4x2_type);
+  add_type(symbols, glsl_type::dmat4x3_type);
+   }
 }
 /** @} */
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index dafee4e..ea53270 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -205,6 +205,11 @@ struct _mesa_glsl_parse_state {
  || EXT_separate_shader_objects_enable;
}
 
+   bool has_double() const
+   {
+  return ARB_gpu_shader_fp64_enable || is_version(400, 0);
+   }
+
void process_version_directive(YYLTYPE *locp, int version,
   const char *ident);
 
diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp
index b4223f4..f6c990f 100644
--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -194,6 +194,22 @@ glsl_type::contains_integer() const
 }
 
 bool
+glsl_type::contains_double() const
+{
+   if (this->is_array()) {
+  return this->fields.array->contains_double(

[Mesa-dev] [PATCH v4] mesa: add double uniform support. (v5)

2015-02-09 Thread Ilia Mirkin
From: Dave Airlie 

This adds support for the new uniform interfaces
from ARB_gpu_shader_fp64.

v2:
support ARB_separate_shader_objects ProgramUniform*d* (Ian)
don't allow boolean uniforms to be updated (issue 15) (Ian)

v3: fix size_mul
v4: Teach uniform update to take into account double precision (Topi)
v5: add transpose for double case (Ilia)

Signed-off-by: Dave Airlie 
---

Changed ir_to_mesa to properly take into account the fact that a dvec2
takes up a single slot. Also add missed dmul to index on src storage
array.

 src/mesa/main/uniform_query.cpp |  46 +++---
 src/mesa/main/uniforms.c| 185 
 src/mesa/main/uniforms.h|   3 +-
 src/mesa/program/ir_to_mesa.cpp |  26 +-
 4 files changed, 228 insertions(+), 32 deletions(-)

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index d36f506..40327fb 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -469,6 +469,9 @@ log_uniform(const void *values, enum glsl_base_type 
basicType,
   case GLSL_TYPE_FLOAT:
 printf("%g ", v[i].f);
 break;
+  case GLSL_TYPE_DOUBLE:
+ printf("%g ", *(double* )&v[i * 2].f);
+ break;
   default:
 assert(!"Should not get here.");
 break;
@@ -529,11 +532,12 @@ _mesa_propagate_uniforms_to_driver_storage(struct 
gl_uniform_storage *uni,
 */
const unsigned components = MAX2(1, uni->type->vector_elements);
const unsigned vectors = MAX2(1, uni->type->matrix_columns);
+   const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
 
/* Store the data in the driver's requested type in the driver's storage
 * areas.
 */
-   unsigned src_vector_byte_stride = components * 4;
+   unsigned src_vector_byte_stride = components * 4 * dmul;
 
for (i = 0; i < uni->num_driver_storage; i++) {
   struct gl_uniform_driver_storage *const store = &uni->driver_storage[i];
@@ -541,7 +545,7 @@ _mesa_propagate_uniforms_to_driver_storage(struct 
gl_uniform_storage *uni,
   const unsigned extra_stride =
 store->element_stride - (vectors * store->vector_stride);
   const uint8_t *src =
-(uint8_t *) (&uni->storage[array_index * (components * vectors)].i);
+(uint8_t *) (&uni->storage[array_index * (dmul * components * 
vectors)].i);
 
 #if 0
   printf("%s: %p[%d] components=%u vectors=%u count=%u vector_stride=%u "
@@ -608,6 +612,7 @@ _mesa_uniform(struct gl_context *ctx, struct 
gl_shader_program *shProg,
   unsigned src_components)
 {
unsigned offset;
+   int size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
 
struct gl_uniform_storage *const uni =
   validate_uniform_parameters(ctx, shProg, location, count,
@@ -623,7 +628,7 @@ _mesa_uniform(struct gl_context *ctx, struct 
gl_shader_program *shProg,
bool match;
switch (uni->type->base_type) {
case GLSL_TYPE_BOOL:
-  match = true;
+  match = (basicType != GLSL_TYPE_DOUBLE);
   break;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
@@ -710,8 +715,8 @@ _mesa_uniform(struct gl_context *ctx, struct 
gl_shader_program *shProg,
/* Store the data in the "actual type" backing storage for the uniform.
 */
if (!uni->type->is_boolean()) {
-  memcpy(&uni->storage[components * offset], values,
-sizeof(uni->storage[0]) * components * count);
+  memcpy(&uni->storage[size_mul * components * offset], values,
+sizeof(uni->storage[0]) * components * count * size_mul);
} else {
   const union gl_constant_value *src =
 (const union gl_constant_value *) values;
@@ -808,13 +813,14 @@ extern "C" void
 _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 GLuint cols, GLuint rows,
  GLint location, GLsizei count,
- GLboolean transpose, const GLfloat *values)
+ GLboolean transpose,
+ const GLvoid *values, GLenum type)
 {
unsigned offset;
unsigned vectors;
unsigned components;
unsigned elements;
-
+   int size_mul;
struct gl_uniform_storage *const uni =
   validate_uniform_parameters(ctx, shProg, location, count,
   &offset, "glUniformMatrix");
@@ -827,6 +833,9 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct 
gl_shader_program *shProg,
   return;
}
 
+   assert(type == GL_FLOAT || type == GL_DOUBLE);
+   size_mul = type == GL_DOUBLE ? 2 : 1;
+
assert(!uni->type->is_sampler());
vectors = uni->type->matrix_columns;
components = uni->type->vector_elements;
@@ -852,7 +861,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct 
gl_shader_program *shProg,
}
 
if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
-  log_uniform(values, GLSL_TYPE_FLOAT, components, vectors, count,
+  log_uniform(values, uni->type->base_type, components, vectors, count,
  

[Mesa-dev] [PATCH v4] glsl: lower double optional passes (v2)

2015-02-09 Thread Ilia Mirkin
From: Dave Airlie 

These lowering passes are optional for the backend to request, currently
the TGSI softpipe backend most likely the r600g backend would want to use
these passes as is. They aim to hit the gallium opcodes from the standard
rounding/truncation functions.

v2: also lower floor in mod_to_floor

Signed-off-by: Dave Airlie 
---

Earlier changes messed things up... and went untested because nvc0
doesn't use this lowering pass. Re-ran on softpipe, works fine again.

 src/glsl/ir_optimization.h  |   1 +
 src/glsl/lower_instructions.cpp | 175 
 2 files changed, 176 insertions(+)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 912d910..9f91e2f 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -41,6 +41,7 @@
 #define CARRY_TO_ARITH 0x200
 #define BORROW_TO_ARITH0x400
 #define SAT_TO_CLAMP   0x800
+#define DOPS_TO_DFRAC  0x1000
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index 8994aad..c63c018 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -42,6 +42,7 @@
  * - CARRY_TO_ARITH
  * - BORROW_TO_ARITH
  * - SAT_TO_CLAMP
+ * - DOPS_TO_DFRAC
  *
  * SUB_TO_ADD_NEG:
  * ---
@@ -112,6 +113,9 @@
  * -
  * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
  *
+ * DOPS_TO_DFRAC:
+ * --
+ * Converts double trunc, ceil, floor, round to fract
  */
 
 #include "main/core.h" /* for M_LOG2E */
@@ -151,6 +155,11 @@ private:
void sat_to_clamp(ir_expression *);
void double_dot_to_fma(ir_expression *);
void double_lrp(ir_expression *);
+   void dceil_to_dfrac(ir_expression *);
+   void dfloor_to_dfrac(ir_expression *);
+   void dround_even_to_dfrac(ir_expression *);
+   void dtrunc_to_dfrac(ir_expression *);
+   void dsign_to_csel(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -315,6 +324,9 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
ir_expression *const floor_expr =
   new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 
+   if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+  dfloor_to_dfrac(floor_expr);
+
ir_expression *const mul_expr =
   new(ir) ir_expression(ir_binop_mul,
 new(ir) ir_dereference_variable(y),
@@ -578,6 +590,145 @@ lower_instructions_visitor::double_lrp(ir_expression *ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
+{
+   /*
+* frtemp = frac(x);
+* temp = sub(x, frtemp);
+* result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
+*/
+   ir_instruction &i = *base_ir;
+   ir_constant *zero = new(ir) ir_constant(0.0, 
ir->operands[0]->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, 
ir->operands[0]->type->vector_elements);
+   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
+ ir_var_temporary);
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(ir->operands[0])));
+
+   ir->operation = ir_binop_add;
+   ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
+   ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
+{
+   /*
+* frtemp = frac(x);
+* result = sub(x, frtemp);
+*/
+   ir->operation = ir_binop_sub;
+   ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
+
+   this->progress = true;
+}
+void
+lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
+{
+   /*
+* insane but works
+* temp = x + 0.5;
+* frtemp = frac(temp);
+* t2 = sub(temp, frtemp);
+* if (frac(x) == 0.5)
+* result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
+*  else
+* result = t2;
+
+*/
+   ir_instruction &i = *base_ir;
+   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
+ ir_var_temporary);
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
+   ir_var_temporary);
+   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
+   ir_var_temporary);
+   ir_constant *p5 = new(ir) ir_constant(0.5, 
ir->operands[0]->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, 
ir->operands[0]->type->vector_elements);
+   ir_constant *zero = new(ir) ir_constant(0.0, 
ir->operands[0]->type->vector_elements);
+
+   i.insert_before(temp);
+   i.insert_before(assign(temp, add(ir->operands[0], p5)));
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(temp)));
+
+   i.insert_before(t2);
+   i.insert_before(assign(t2, sub(temp, frtemp)));
+
+   ir->operation = ir_triop_csel;
+   ir->op

Re: [Mesa-dev] Expected wide line rendering with clipping

2015-02-09 Thread Iago Toral
On Sat, 2015-02-07 at 13:41 +0100, Erik Faye-Lund wrote:
> On Fri, Feb 6, 2015 at 1:11 PM, Iago Toral  wrote:
> > Hi,
> >
> > Eduardo and I have been looking into a few dEQP test failures that deal
> > with wide line rendering. There are a few of them that fail because of
> > how clipping is implemented for this case.
> >
> > The problem in these cases seems to be that the hw renders the wide line
> > as a parallelogram so that if an edge of the parallelogram is partially
> > clipped, the other parallel edge will also be clipped at the same
> > height/width so that the resulting wide line stays a parallelogram. The
> > dEQP renders as if a wide line where a collection of 1px lines, so
> > cliping one edge of the resulting line does not have implications for
> > the other edge.
> >
> > This ASCII art illustrates the problem (* represent line pixels, |
> > represents the viewport's rightmost edge):
> >
> >  Expected by dEQP  i965 rendering
> >  |  |
> > *|  |
> >**|  |
> >   ***|  |
> >  |  |
> >  |  |
> >  |  |
> 
> Is that drawing correct? I would expect it to look like this:
> 
>   |  |
>  *|  |
> **|**|**
>***|   ***|*
>   |  |
>   |  |
>   |  |

Yes, it is correct. Your expectation makes sense if things happen as
Roland described (first we clip the 1px line, then we expand), but the
result in i965 looks more like what you would obtain if you do it the
other way around.

As I mentioned in my response to Roland, I think i965 can't render what
you expect (at least directly), the reason being that it renders wide
lines as parallelograms instead of multiple 1px lines.

> From the drawing, it looks like the line is exactly 45 degrees, so I
> believe this particular case is actually undefined, because of the
> following spec wording:

Right, that was just my drawing. The dEQP test case renders an x-major
line.

> "Line segment rasterization begins by characterizing the segment as
> either x-major or y-major. x-major line segments have slope in the
> closed interval [−1, 1]; all other line segments are y-major (slope is
> determined by the segment’s endpoints)."
> 
> A 45 degree line should have a slope of 1, and should in theory be
> identified as an x-major line, giving the result dEQP expects (i965 is
> in this case rendering the line as similar to y-major, except y-major
> rendering should also generate fragments outside the viewport, as
> pointed out in my drawing above). However, since we're exactly at the
> break-off point, previous calculations are allowed to be off by some
> small value, leaving the characterization undefined.
> 
> So I think in this case, both results are allowed, and the test is
> therefore somewhat bogus. If the line was tilted a tiny amount so it's
> guaranteed to be characterized as an x-major, then it should be
> correct again. Not having access to the tests myself, they might
> already have done this, though.
> 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 00/29] add fp64 support to mesa and glsl compiler

2015-02-09 Thread Ilia Mirkin
I just sent out a few updated patches (latest can be seen in
https://github.com/imirkin/mesa/commits/fp64-5) -- with all that in
place the mysterious failure in one of the initializer tests is gone,
and the constbuf locations being used correspond to the ones that we
expect instead of having *tons* of empty space.

On Sun, Feb 8, 2015 at 4:00 AM, Ilia Mirkin  wrote:
> This is v3 of the patchset which includes fixes based on Ian's
> feedback with the exception of his comment on patch 23 (glsl: validate
> output types for shader stages), i.e. whether it needs to exist at all
> and isn't that being checked elsewhere. I am unfortunately not
> familiar enough with the overall flow of things yet to work it
> out. Perhaps someone else can take a look... Tapani?
>
> Chris also had a suggestion for standardizing the size_mul/dmul/etc
> variable names... unfortunately various ones count different
> things. Some are 4/8, others are 1/2. I left them as is for now.
>
> If I missed anything from the existing feedback, please let me
> know. As before, this passes all current fp64 tests in piglit on nvc0
> with the exception of 4 initializer tests which Dave is looking at but
> is fairly confident the error is in st/mesa somewhere.
>
> Patches also available at https://github.com/imirkin/mesa/commits/fp64-4
>
> Dave Airlie (25):
>   glapi: add ARB_gpu_shader_fp64 (v2)
>   mesa: add ARB_gpu_shader_fp64 extension info (v2)
>   mesa: add mesa_type_is_double helper function (v2)
>   mesa: add double uniform support. (v5)
>   glsl: add ARB_gpu_shader_fp64 to the glsl extensions. (v2)
>   glsl: Add double builtin type generation
>   glsl: Uniform linking support for doubles
>   glsl/ir: Add builtin function support for doubles
>   glsl/ir: Add printing support for doubles
>   glsl/ir: Add cloning support for doubles
>   glsl/ir: Add builtin constant function support for doubles
>   glsl/ir: Add builder support for functions with double floats
>   glsl: Add support doubles in optimization passes
>   glsl: Add ubo lowering support for doubles
>   glsl/ast: Support double floats
>   glsl/parser: Support double floats
>   glsl/lexer: Support double floats
>   glsl: Support double inouts
>   glsl: Support double loop control
>   glsl: Linking support for doubles
>   glsl: add double support to lower_mat_op_to_vec
>   glsl: enable/disable certain lowering passes for doubles
>   glsl/lower_instructions: add double lowering passes
>   glsl: implement double builtin functions
>   glsl: lower double optional passes (v2)
>
> Ilia Mirkin (3):
>   glsl: Add double builtin type
>   glsl: add a lowering pass for frexp/ldexp with double arguments
>   glsl/tests: add DOUBLE/IMAGE types
>
> Tapani Pälli (1):
>   glsl: validate output types for shader stages
>
>  src/glsl/ast.h |   2 +
>  src/glsl/ast_function.cpp  |  66 +-
>  src/glsl/ast_to_hir.cpp|  78 ++-
>  src/glsl/builtin_functions.cpp | 751 
> ++---
>  src/glsl/builtin_type_macros.h |  16 +
>  src/glsl/builtin_types.cpp |  30 +
>  src/glsl/glcpp/glcpp-parse.y   |   3 +
>  src/glsl/glsl_lexer.ll |  31 +-
>  src/glsl/glsl_parser.yy|  33 +-
>  src/glsl/glsl_parser_extras.cpp|   5 +
>  src/glsl/glsl_parser_extras.h  |   7 +
>  src/glsl/glsl_types.cpp| 109 ++-
>  src/glsl/glsl_types.h  |  20 +-
>  src/glsl/ir.cpp| 111 ++-
>  src/glsl/ir.h  |  22 +
>  src/glsl/ir_builder.cpp|  23 +
>  src/glsl/ir_builder.h  |   5 +
>  src/glsl/ir_clone.cpp  |   1 +
>  src/glsl/ir_constant_expression.cpp| 247 ++-
>  src/glsl/ir_optimization.h |   2 +
>  src/glsl/ir_print_visitor.cpp  |  11 +
>  src/glsl/ir_set_program_inouts.cpp |  28 +-
>  src/glsl/ir_validate.cpp   |  72 +-
>  src/glsl/link_uniform_initializers.cpp |   8 +-
>  src/glsl/link_uniforms.cpp |   9 +-
>  src/glsl/link_varyings.cpp |   3 +-
>  src/glsl/loop_controls.cpp |   8 +-
>  src/glsl/lower_instructions.cpp| 520 +-
>  src/glsl/lower_mat_op_to_vec.cpp   |   2 +
>  src/glsl/lower_ubo_reference.cpp   |  18 +-
>  src/glsl/opt_algebraic.cpp |  26 +-
>  src/glsl/opt_constant_propagation.cpp  |   3 +
>  src/glsl/opt_minmax.cpp|  13 +
>  src/glsl/standalone_scaffolding.cpp|   1 +
>  src/glsl/tests/uniform_initializer_utils.cpp   |  12 +

Re: [Mesa-dev] Expected wide line rendering with clipping

2015-02-09 Thread Roland Scheidegger
Am 09.02.2015 um 09:53 schrieb Iago Toral:
> On Fri, 2015-02-06 at 21:27 +0100, Roland Scheidegger wrote:
>> Am 06.02.2015 um 13:11 schrieb Iago Toral:
>>> Hi,
>>>
>>> Eduardo and I have been looking into a few dEQP test failures that deal
>>> with wide line rendering. There are a few of them that fail because of
>>> how clipping is implemented for this case.
>>>
>>> The problem in these cases seems to be that the hw renders the wide line
>>> as a parallelogram so that if an edge of the parallelogram is partially
>>> clipped, the other parallel edge will also be clipped at the same
>>> height/width so that the resulting wide line stays a parallelogram. The
>>> dEQP renders as if a wide line where a collection of 1px lines, so
>>> cliping one edge of the resulting line does not have implications for
>>> the other edge.
>>>
>>> This ASCII art illustrates the problem (* represent line pixels, |
>>> represents the viewport's rightmost edge):
>>>
>>>  Expected by dEQP  i965 rendering
>>>  |  |
>>> *|  |
>>>**|  |
>>>   ***|  |
>>>  |  |
>>>  |  |
>>>  |  |
>>>
>>> We can make the rendering result match dEQP's expectation by enabling
>>> the GuardBand in the clip stage (GEN6_CLIP_GB_TEST). This is being
>>> disabled by the driver for gen7 in this scenario because the underlying
>>> framebuffer surface is larger than the viewport, and per the comment in
>>> the code, in gen7 that would make it render beyond the viewport limits
>>> in that surface, while it notes that gen8 hw fixes this. So I guess that
>>> we do not want to do this in any case in gen7.
>>>
>>> Then, I have been reviewing the OpenGL specs to see if they clarify what
>>> should happen when clipping wide lines and I think the spec does not
>>> make a clear point, since when it talks about line clipping it does not
>>> cover the case of wide lines specifically:
>>>
>>> "13.5. PRIMITIVE CLIPPING
>>> ...
>>> If part of the line segment lies in the volume and part lies outside,
>>> then the line segment is clipped and new vertex coordinates are computed
>>> for one or both vertices. A clipped line segment endpoint lies on both
>>> the original line segment and the boundary of the clip volume.
>>> ...
>>> "
>>>
>>> The above description is clear for 1px lines, but where should the new
>>> vertex be generated exactly for wide lines, where many points could be
>>> at the boundary of the clip volume? Is any point valid? (that would mean
>>> the dEQP test is bogus because there are multiple accepted renderings),
>>> should the new vertex be exactly at the center of the line? (that would
>>> make both deqp and intel rendering bogus).
>>>
>>> Then there is also this comment in "14.5.2.2 Wide Lines" inside the
>>> non-AA line rasterization chapter:
>>>
>>> "Non-antialiased line segments of width other than one are rasterized by
>>> off-setting them in the minor direction (for an x-major line, the minor
>>> direction is y, and for a y-major line, the minor direction is x) and
>>> replicating fragments in the minor direction (see figure 14.3). Let w be
>>> the width rounded to the nearest integer (if w = 0, then it is as if w =
>>> 1). If the line segment has endpoints given by (x 0 , y 0 ) and (x 1 , y
>>> 1 ) in window coordinates, the segment with endpoints (x 0 , y 0 − (w −
>>> 1)/2) and (x 1 , y 1 − (w − 1)/2) is rasterized, but instead of a single
>>> fragment, a column of fragments of height w (a row of fragments of
>>> length w for a y-major segment) is produced at each x (y for y-major)
>>> location. The lowest fragment of this column is the fragment that would
>>> be produced by rasterizing the segment of width 1 with the modified
>>> coordinates."
>>>
>>> This "suggests" that wide line rendering should be equivalent to
>>> rendering multiple lines of width=1 and if we actually did that then the
>>> expected rendering result after clipping would be what dEQP expects.
>>>
>>> I think the whole thing isn't 100% clear. Does anyone have any thoughts
>>> on what should be considered correct behavior in this case?
>>>
>>> Iago
>>>
>>>
>>
>> I am quite sure that i965 rendering is what (at least legacy GL)
>> expects. This is because the wide-line expansion is defined to be
>> happening at rasterization time, i.e. past clipping. Thus, at clipping
>> time, no matter if a line is wide or not, it is still really a line,
>> hence clipping will produce a single new endpoint at the viewport edge
>> and in rasterization this is then what will be used as the endpoint.
> 
> That makes sense, but in that case I think i965 rendering would still be
> incorrect, the expected result would look something like this:
> 
>   |
>   |
> **|
>***|
>   |
>   |
>   |
> 
> However, as far as I can tell, since i965 hardware renders wi

[Mesa-dev] [PATCH 3/3] dri/common: Fix returned value of __DRI2_RENDERER_PREFERRED_PROFILE

2015-02-09 Thread Andreas Boll
If the renderer supports the core profile the query returned incorrectly
0x8 as value, because it was using (1U << __DRI_API_OPENGL_CORE) for the
returned value.

The same happened with the compatibility profile. It returned 0x1
(1U << __DRI_API_OPENGL) instead of 0x2.

Used defines for the incorrect behavior:
   dri_interface.h: #define __DRI_API_OPENGL   0
   dri_interface.h: #define __DRI_API_OPENGL_CORE  3

Now we return GLX_CONTEXT_CORE_PROFILE_BIT_ARB (0x1) for a preferred
core context profile and GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB (0x2)
for a preferred compatibility context profile.

Cc: "10.3 10.4 10.5" 
Signed-off-by: Andreas Boll 
---

I've noticed this wrong behavior while testing Adam's glxinfo changes [1]
(glxinfo: Add support for GLX_MESA_query_renderer)

For reproducing this bug and/or testing this and the previous patch just run
  glxinfo | grep "Preferred profile"

Andreas.

[1] 
http://cgit.freedesktop.org/mesa/demos/commit/?id=999b6950c644266bb871e79438751bdba2fa2a08

 src/mesa/drivers/dri/common/utils.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/common/utils.c 
b/src/mesa/drivers/dri/common/utils.c
index ccdc971..08e0d79 100644
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -522,7 +522,8 @@ driQueryRendererIntegerCommon(__DRIscreen *psp, int param, 
unsigned int *value)
}
case __DRI2_RENDERER_PREFERRED_PROFILE:
   value[0] = (psp->max_gl_core_version != 0)
- ? (1U << __DRI_API_OPENGL_CORE) : (1U << __DRI_API_OPENGL);
+ ? GLX_CONTEXT_CORE_PROFILE_BIT_ARB
+ : GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB;
   return 0;
case __DRI2_RENDERER_OPENGL_CORE_PROFILE_VERSION:
   value[0] = psp->max_gl_core_version / 10;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa: Redefine GLX_CONTEXT_{CORE|COMPATIBILITY}_PROFILE_BIT_ARB

2015-02-09 Thread Andreas Boll
GLX_MESA_query_renderer uses these values for its query
GLX_RENDERER_PREFERRED_PROFILE_MESA.  Since GLX header files may not
be available everywhere they need to be used, redefine them here.

This is needed for the next patch.

Cc: "10.3 10.4 10.5" 
Signed-off-by: Andreas Boll 
---

I hope this is the right solution to use these defines in
src/mesa/drivers/dri/common/utils.c since the above defines like
GLX_DONT_CARE or GLX_NONE do the same.

Andreas.

src/mesa/main/glheader.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index 7f7f9a3..3151f75 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -162,6 +162,12 @@ typedef void *GLeglImageOES;
 
 #define GLX_DONT_CARE  0x
 
+/* GLX_MESA_query_renderer uses these values for its query
+ * GLX_RENDERER_PREFERRED_PROFILE_MESA.  Since GLX header files may not be
+ * available everywhere they need to be used, redefine them here.
+ */
+#define GLX_CONTEXT_CORE_PROFILE_BIT_ARB 0x0001
+#define GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x0002
 
 #ifdef __cplusplus
 }
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] dri/common: Update comment about driQueryRendererIntegerCommon

2015-02-09 Thread Andreas Boll
Since 87d3ae0b45b6b6bb50b583dafa55eb109449a005
driQueryRendererIntegerCommon handles __DRI2_RENDERER_PREFFERED_PROFILE
too.

Signed-off-by: Andreas Boll 
---
 src/mesa/drivers/dri/common/utils.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/common/utils.c 
b/src/mesa/drivers/dri/common/utils.c
index f2e63c0..ccdc971 100644
--- a/src/mesa/drivers/dri/common/utils.c
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -485,6 +485,7 @@ driIndexConfigAttrib(const __DRIconfig *config, int index,
  * Currently only the following queries are supported by this function:
  *
  * - \c __DRI2_RENDERER_VERSION
+ * - \c __DRI2_RENDERER_PREFERRED_PROFILE
  * - \c __DRI2_RENDERER_OPENGL_CORE_PROFILE_VERSION
  * - \c __DRI2_RENDERER_OPENGL_COMPATIBLITY_PROFILE_VERSION
  * - \c __DRI2_RENDERER_ES_PROFILE_VERSION
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/32] i965/fs: Fix stack allocation of fs_inst and stop stealing src array provided on construction.

2015-02-09 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez  wrote:
>> Using 'ralloc*(this, ...)' is wrong if the object has automatic
>> storage or was allocated through any other means.  Use normal dynamic
>> memory instead.
>> ---
>
> I don't see any places we were allocating an fs_inst via a mechanism
> other than ralloc. Does one exist, or is this commit in preparation
> for doing so?
>
The latter, you're right that right now all fs_inst allocations are done
using ralloc.

> I understand that in general "stealing" the source array might be
> somewhat unexpected, but as far as I know we do this with
> load_payload, where it seems pretty clear. Is this just an issue of
> taste?

I don't think it's just a matter of taste.  It actually took me hours of
debugging to find out what was corrupting random memory in my program.
The cause turned out to be that I was passing a stack-allocated array as
the src argument of LOAD_PAYLOAD() (which gives no indication of the
argument having such particular memory semantics), the array is then
silently mutated by fs_inst::init() and bound to fs_inst:src with no
consideration of what the caller had in mind to do with it afterwards.
Later on my original stack allocation goes out of scope and the sources
of the instruction end up getting filled with garbage (and worst-case
the stack could get corrupted if someone modifies the sources of the
instruction later on).

You might argue against stack-allocating the source array, but it's by
far the most convenient (just see the number of lines of code saved in
the other fs_inst constructors by allocating the sources in the stack,
which allows you to use aggregate initializer syntax), cache-local and
memory-efficient (as the array allocation is released as soon as it goes
out of scope rather than letting it leak until the end of the
compilation).  For these reasons it's likely to happen again to someone
else in the future, I already learnt the lesson the hard way.

The other reason against using ralloc to allocate fs_insts is that it's
ridiculously memory-inefficient.  Each fs_inst allocation done using
ralloc requires 80 bytes (96 bytes in debug mode) of extra space for the
two header structures (one for the fs_inst itself and another one for
the source array), that's almost as much as the fs_inst structure itself
(104 bytes), and the whole thing leaks until the compilation terminates
even if as it frequently happens the instruction is eliminated along the
way by some optimization pass.


pgpHS9K2ZL3jy.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/32] i965/fs: Fix register coalesce not to lose track of the second half of 16-wide moves.

2015-02-09 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez  wrote:
>> Fixes rewrite by the register coalesce pass of references to
>> individual halves of 16-wide coalesced registers.
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp 
>> b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
>> index 09f0fad..2a26a46 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
>> @@ -211,9 +211,13 @@ fs_visitor::register_coalesce()
>>  continue;
>>   }
>>   reg_to_offset[offset] = inst->dst.reg_offset;
>> - if (inst->src[0].width == 16)
>> -reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
>>   mov[offset] = inst;
>> +
>> + if (inst->exec_size * type_sz(inst->src[0].type) > REG_SIZE) {
>> +reg_to_offset[offset + 1] = inst->dst.reg_offset + 1;
>> +mov[offset + 1] = inst;
>> + }
>> +
>>   channels_remaining -= inst->regs_written;
>>}
>>
>> --
>> 2.1.3
>
> I can believe it. It would help me to understand if we had an example
> of a sequence of instructions that this code didn't handle properly.

The problem is in the "rewrite" phase of the register coalesce pass
(roughly lines 264-283).  It won't fix up instructions that reference
some specific offset of the coalesced register if mov[i] is NULL for
that offset, as is the case for the second half of a 16-wide move.  For
example:

| ADD (16) vgrf0:f, vgrf0:f, 1.0:f
| MOV (16) vgrf1:f, vgrf0:f
| MOV (8)  vgrf2:f, vgrf0+1:f { sechalf }

will get incorrectly register-coalesced into:

| ADD (16) vgrf1:f, vgrf1:f, 1.0:f
| MOV (8)  vgrf2:f, vgrf0+1:f { sechalf }


pgp5i_CmC3EKd.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] main: Added entry point for glTransformFeedbackBufferBase

2015-02-09 Thread Martin Peres
v2: Review from Laura Ekstrand
- give more helpful error messages
- factor the lookup code for the xfb and objBuf
- replace some already-existing tabs with spaces
- add comments to explain the cases where xfb == 0 or buffer == 0
- fix the condition for binding the transform buffer or not

v3: Review from Laura Ekstrand
- rename _mesa_lookup_bufferobj_err to
  _mesa_lookup_transform_feedback_bufferobj_err and make it static to avoid a
  future conflict
- make _mesa_lookup_transform_feedback_object_err static

v4: Review from Laura Ekstrand
- add the pdf page number when quoting the spec
- rename some of the symbols to follow the public/private conventions

Signed-off-by: Martin Peres 
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |   6 +
 src/mesa/main/bufferobj.c  |   9 +-
 src/mesa/main/objectlabel.c|   2 +-
 src/mesa/main/tests/dispatch_sanity.cpp|   1 +
 src/mesa/main/transformfeedback.c  | 146 +++--
 src/mesa/main/transformfeedback.h  |  13 ++-
 src/mesa/vbo/vbo_exec_array.c  |   8 +-
 7 files changed, 139 insertions(+), 46 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 15b00c2..35d6906 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -14,6 +14,12 @@
   

 
+   
+  
+  
+  
+   
+

 

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 0c1ce98..86532ea 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3546,8 +3546,9 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
 
switch (target) {
case GL_TRANSFORM_FEEDBACK_BUFFER:
-  _mesa_bind_buffer_range_transform_feedback(ctx, index, bufObj,
-offset, size);
+  _mesa_bind_buffer_range_transform_feedback(ctx,
+ 
ctx->TransformFeedback.CurrentObject,
+ index, bufObj, offset, size);
   return;
case GL_UNIFORM_BUFFER:
   bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size);
@@ -3611,7 +3612,9 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint 
buffer)
 
switch (target) {
case GL_TRANSFORM_FEEDBACK_BUFFER:
-  _mesa_bind_buffer_base_transform_feedback(ctx, index, bufObj);
+  _mesa_bind_buffer_base_transform_feedback(ctx,
+
ctx->TransformFeedback.CurrentObject,
+index, bufObj, false);
   return;
case GL_UNIFORM_BUFFER:
   bind_buffer_base_uniform_buffer(ctx, index, bufObj);
diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c
index 78df96b..19a7e59 100644
--- a/src/mesa/main/objectlabel.c
+++ b/src/mesa/main/objectlabel.c
@@ -170,7 +170,7 @@ get_label_pointer(struct gl_context *ctx, GLenum 
identifier, GLuint name,
case GL_TRANSFORM_FEEDBACK:
   {
  struct gl_transform_feedback_object *tfo =
-_mesa_lookup_transform_feedback_object(ctx, name);
+lookup_transform_feedback_object(ctx, name);
  if (tfo)
 labelPtr = &tfo->Label;
   }
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index bf320bf..183755f 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -956,6 +956,7 @@ const struct function gl_core_functions_possible[] = {
 
/* GL_ARB_direct_state_access */
{ "glCreateTransformFeedbacks", 45, -1 },
+   { "glTransformFeedbackBufferBase", 45, -1 },
{ "glCreateTextures", 45, -1 },
{ "glTextureStorage1D", 45, -1 },
{ "glTextureStorage2D", 45, -1 },
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index 10bb6a1..d932943 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -514,22 +514,24 @@ _mesa_EndTransformFeedback(void)
  * Helper used by BindBufferRange() and BindBufferBase().
  */
 static void
-bind_buffer_range(struct gl_context *ctx, GLuint index,
+bind_buffer_range(struct gl_context *ctx,
+  struct gl_transform_feedback_object *obj,
+  GLuint index,
   struct gl_buffer_object *bufObj,
-  GLintptr offset, GLsizeiptr size)
+  GLintptr offset, GLsizeiptr size,
+  bool dsa)
 {
-   struct gl_transform_feedback_object *obj =
-  ctx->TransformFeedback.CurrentObject;
-
/* Note: no need to FLUSH_VERTICES or flag NewTransformFeedback, because
 * transform feedback buffers can't be changed while transform feedback is
 * active.
 */
 
-   /* The general binding point */
-   _mesa_reference_buffer_object(ctx,
- &ctx->Tran

[Mesa-dev] [PATCH 1/6] main: Added entry point for glCreateTransformFeedbacks

2015-02-09 Thread Martin Peres
v2: Review from Laura Ekstrand
- generate the name of the gl method once
- shorten some lines to stay in the 78 chars limit

v3: Review from Fredrik Höglund 
- rename gl_mthd_name to func
- set EverBound in _mesa_create_transform_feedbacks in the dsa case

v4:
- rename _mesa_create_transform_feedbacks to create_transform_feedbacks and
  make it static

Signed-off-by: Martin Peres 
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  7 +++
 src/mesa/main/tests/dispatch_sanity.cpp|  1 +
 src/mesa/main/transformfeedback.c  | 67 --
 src/mesa/main/transformfeedback.h  |  3 ++
 4 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 2fe1638..15b00c2 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -7,6 +7,13 @@


 
+   
+
+  
+  
+  
+   
+

 

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index 1f1a3a8..bf320bf 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -955,6 +955,7 @@ const struct function gl_core_functions_possible[] = {
{ "glClipControl", 45, -1 },
 
/* GL_ARB_direct_state_access */
+   { "glCreateTransformFeedbacks", 45, -1 },
{ "glCreateTextures", 45, -1 },
{ "glTextureStorage1D", 45, -1 },
{ "glTextureStorage2D", 45, -1 },
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index a737463..10bb6a1 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -825,25 +825,24 @@ _mesa_lookup_transform_feedback_object(struct gl_context 
*ctx, GLuint name)
  _mesa_HashLookup(ctx->TransformFeedback.Objects, name);
 }
 
-
-/**
- * Create new transform feedback objects.   Transform feedback objects
- * encapsulate the state related to transform feedback to allow quickly
- * switching state (and drawing the results, below).
- * Part of GL_ARB_transform_feedback2.
- */
-void GLAPIENTRY
-_mesa_GenTransformFeedbacks(GLsizei n, GLuint *names)
+static void
+create_transform_feedbacks(struct gl_context *ctx, GLsizei n, GLuint *ids,
+   bool dsa)
 {
GLuint first;
-   GET_CURRENT_CONTEXT(ctx);
+   const char* func;
+
+   if (dsa)
+  func = "glCreateTransformFeedbacks";
+   else
+  func = "glGenTransformFeedbacks";
 
if (n < 0) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "glGenTransformFeedbacks(n < 0)");
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func);
   return;
}
 
-   if (!names)
+   if (!ids)
   return;
 
/* we don't need contiguous IDs, but this might be faster */
@@ -854,18 +853,56 @@ _mesa_GenTransformFeedbacks(GLsizei n, GLuint *names)
  struct gl_transform_feedback_object *obj
 = ctx->Driver.NewTransformFeedback(ctx, first + i);
  if (!obj) {
-_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTransformFeedbacks");
+_mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
 return;
  }
- names[i] = first + i;
+ ids[i] = first + i;
  _mesa_HashInsert(ctx->TransformFeedback.Objects, first + i, obj);
+ if (dsa) {
+/* this is normally done at bind time in the non-dsa case */
+obj->EverBound = GL_TRUE;
+ }
   }
}
else {
-  _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTransformFeedbacks");
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func);
}
 }
 
+/**
+ * Create new transform feedback objects.   Transform feedback objects
+ * encapsulate the state related to transform feedback to allow quickly
+ * switching state (and drawing the results, below).
+ * Part of GL_ARB_transform_feedback2.
+ */
+void GLAPIENTRY
+_mesa_GenTransformFeedbacks(GLsizei n, GLuint *names)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* GenTransformFeedbacks should just reserve the object names that a
+* subsequent call to BindTransformFeedback should actively create. For
+* the sake of simplicity, we reserve the names and create the objects
+* straight away.
+*/
+
+   create_transform_feedbacks(ctx, n, names, false);
+}
+
+/**
+ * Create new transform feedback objects.   Transform feedback objects
+ * encapsulate the state related to transform feedback to allow quickly
+ * switching state (and drawing the results, below).
+ * Part of GL_ARB_direct_state_access.
+ */
+void GLAPIENTRY
+_mesa_CreateTransformFeedbacks(GLsizei n, GLuint *names)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   create_transform_feedbacks(ctx, n, names, true);
+}
+
 
 /**
  * Is the given ID a transform feedback object?
diff --git a/src/mesa/main/transformfeedback.h 
b/src/mesa/main/transformfeedback.h
index 87f4080..9de1fef 100644
--- a/src/mesa/main/transformfeedback.h
+++ b/src/mesa/main/transformfeedback.h
@@ -102,6

[Mesa-dev] [PATCH 5/6] main: Added entry point for glGetTransformFeedbacki_v

2015-02-09 Thread Martin Peres
v2: Review from Laura Ekstrand
- use the transform feedback object lookup wrapper

v3:
- use the new name of _mesa_lookup_transform_feedback_object_err

Signed-off-by: Martin Peres 
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  7 +++
 src/mesa/main/tests/dispatch_sanity.cpp|  1 +
 src/mesa/main/transformfeedback.c  | 29 ++
 src/mesa/main/transformfeedback.h  |  4 
 4 files changed, 41 insertions(+)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 1ac4386..935e088 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -34,6 +34,13 @@
   

 
+   
+  
+  
+  
+  
+   
+

 

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index ba36d28..d5535e9 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -959,6 +959,7 @@ const struct function gl_core_functions_possible[] = {
{ "glTransformFeedbackBufferBase", 45, -1 },
{ "glTransformFeedbackBufferRange", 45, -1 },
{ "glGetTransformFeedbackiv", 45, -1 },
+   { "glGetTransformFeedbacki_v", 45, -1 },
{ "glCreateTextures", 45, -1 },
{ "glTextureStorage1D", 45, -1 },
{ "glTextureStorage2D", 45, -1 },
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index 9c9cba2..ce9a6d4 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -1228,3 +1228,32 @@ _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, 
GLint *param)
"glGetTransformFeedbackiv(pname=%i)", pname);
 }
 }
+
+extern void GLAPIENTRY
+_mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index,
+  GLint *param)
+{
+   struct gl_transform_feedback_object *obj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   obj = lookup_transform_feedback_object_err(ctx, xfb,
+  "glGetTransformFeedbacki_v");
+   if(!obj) {
+  return;
+   }
+
+   if (index >= ctx->Const.MaxTransformFeedbackBuffers) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetTransformFeedbacki_v(index=%i)", index);
+  return;
+   }
+
+   switch(pname) {
+   case GL_TRANSFORM_FEEDBACK_BUFFER_BINDING:
+  *param = obj->BufferNames[index];
+  break;
+   default:
+  _mesa_error(ctx, GL_INVALID_ENUM,
+  "glGetTransformFeedbacki_v(pname=%i)", pname);
+   }
+}
diff --git a/src/mesa/main/transformfeedback.h 
b/src/mesa/main/transformfeedback.h
index ba00636..9936c68 100644
--- a/src/mesa/main/transformfeedback.h
+++ b/src/mesa/main/transformfeedback.h
@@ -158,4 +158,8 @@ _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint 
index, GLuint buffer,
 extern void GLAPIENTRY
 _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param);
 
+extern void GLAPIENTRY
+_mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index,
+  GLint *param);
+
 #endif /* TRANSFORM_FEEDBACK_H */
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] main: Added entry point for glGetTransformFeedbacki64_v

2015-02-09 Thread Martin Peres
v2: Review from Laura Ekstrand
- use the transform feedback object lookup wrapper

v3:
- use the new name of _mesa_lookup_transform_feedback_object_err

Signed-off-by: Martin Peres 
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  7 ++
 src/mesa/main/tests/dispatch_sanity.cpp|  1 +
 src/mesa/main/transformfeedback.c  | 32 ++
 src/mesa/main/transformfeedback.h  |  4 
 4 files changed, 44 insertions(+)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 935e088..340dbba 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -41,6 +41,13 @@
   

 
+   
+  
+  
+  
+  
+   
+

 

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index d5535e9..ad5da83 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -960,6 +960,7 @@ const struct function gl_core_functions_possible[] = {
{ "glTransformFeedbackBufferRange", 45, -1 },
{ "glGetTransformFeedbackiv", 45, -1 },
{ "glGetTransformFeedbacki_v", 45, -1 },
+   { "glGetTransformFeedbacki64_v", 45, -1 },
{ "glCreateTextures", 45, -1 },
{ "glTextureStorage1D", 45, -1 },
{ "glTextureStorage2D", 45, -1 },
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index ce9a6d4..0b6d04b 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -1257,3 +1257,35 @@ _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, 
GLuint index,
   "glGetTransformFeedbacki_v(pname=%i)", pname);
}
 }
+
+extern void GLAPIENTRY
+_mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index,
+  GLint64 *param)
+{
+   struct gl_transform_feedback_object *obj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   obj = lookup_transform_feedback_object_err(ctx, xfb,
+  "glGetTransformFeedbacki64_v");
+   if(!obj) {
+  return;
+   }
+
+   if (index >= ctx->Const.MaxTransformFeedbackBuffers) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "glGetTransformFeedbacki64_v(index=%i)", index);
+  return;
+   }
+
+   switch(pname) {
+   case GL_TRANSFORM_FEEDBACK_BUFFER_START:
+  *param = obj->Offset[index];
+  break;
+   case GL_TRANSFORM_FEEDBACK_BUFFER_SIZE:
+  *param = obj->RequestedSize[index];
+  break;
+   default:
+  _mesa_error(ctx, GL_INVALID_ENUM,
+  "glGetTransformFeedbacki64_v(pname=%i)", pname);
+   }
+}
diff --git a/src/mesa/main/transformfeedback.h 
b/src/mesa/main/transformfeedback.h
index 9936c68..3d21779 100644
--- a/src/mesa/main/transformfeedback.h
+++ b/src/mesa/main/transformfeedback.h
@@ -162,4 +162,8 @@ extern void GLAPIENTRY
 _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index,
   GLint *param);
 
+extern void GLAPIENTRY
+_mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index,
+  GLint64 *param);
+
 #endif /* TRANSFORM_FEEDBACK_H */
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/6] Add 6 transform-feedback-related entry points for direct_state_access v2

2015-02-09 Thread Martin Peres
The updated version of my series. I think I answered everyone's comments
on the previous series. The corresponding piglit tests have been submited
to the piglit ML too. No regression spotted by piglit when applying this
series.

Warning: Those tests will only work when forcing mesa to expose the DSA
extension and when using enable_ppgtt=2. Only gen7 has been tested.

Martin Peres (6):
  main: Added entry point for glCreateTransformFeedbacks
  main: Added entry point for glTransformFeedbackBufferBase
  main: Added entry point for glTransformFeedbackBufferRange
  main: Added entry point for glGetTransformFeedbackiv
  main: Added entry point for glGetTransformFeedbacki_v
  main: Added entry point for glGetTransformFeedbacki64_v

 src/mapi/glapi/gen/ARB_direct_state_access.xml |  41 +++
 src/mesa/main/bufferobj.c  |  10 +-
 src/mesa/main/objectlabel.c|   2 +-
 src/mesa/main/tests/dispatch_sanity.cpp|   6 +
 src/mesa/main/transformfeedback.c  | 375 +
 src/mesa/main/transformfeedback.h  |  33 ++-
 src/mesa/vbo/vbo_exec_array.c  |   8 +-
 7 files changed, 407 insertions(+), 68 deletions(-)

-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] main: Added entry point for glGetTransformFeedbackiv

2015-02-09 Thread Martin Peres
v2: Review from Laura Ekstrand
- use the transform feedback object lookup wrapper

Signed-off-by: Martin Peres 
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  6 ++
 src/mesa/main/tests/dispatch_sanity.cpp|  1 +
 src/mesa/main/transformfeedback.c  | 25 +
 src/mesa/main/transformfeedback.h  |  3 +++
 4 files changed, 35 insertions(+)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index b3c090f..1ac4386 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -28,6 +28,12 @@
   

 
+   
+  
+  
+  
+   
+

 

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index 87f7d6f..ba36d28 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -958,6 +958,7 @@ const struct function gl_core_functions_possible[] = {
{ "glCreateTransformFeedbacks", 45, -1 },
{ "glTransformFeedbackBufferBase", 45, -1 },
{ "glTransformFeedbackBufferRange", 45, -1 },
+   { "glGetTransformFeedbackiv", 45, -1 },
{ "glCreateTextures", 45, -1 },
{ "glTextureStorage1D", 45, -1 },
{ "glTextureStorage2D", 45, -1 },
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index 2dded21..9c9cba2 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -1203,3 +1203,28 @@ _mesa_ResumeTransformFeedback(void)
assert(ctx->Driver.ResumeTransformFeedback);
ctx->Driver.ResumeTransformFeedback(ctx, obj);
 }
+
+extern void GLAPIENTRY
+_mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param)
+{
+struct gl_transform_feedback_object *obj;
+GET_CURRENT_CONTEXT(ctx);
+
+obj = lookup_transform_feedback_object_err(ctx, xfb,
+   "glGetTransformFeedbackiv");
+if(!obj) {
+   return;
+}
+
+switch(pname) {
+case GL_TRANSFORM_FEEDBACK_PAUSED:
+   *param = obj->Paused;
+   break;
+case GL_TRANSFORM_FEEDBACK_ACTIVE:
+   *param = obj->Active;
+   break;
+default:
+   _mesa_error(ctx, GL_INVALID_ENUM,
+   "glGetTransformFeedbackiv(pname=%i)", pname);
+}
+}
diff --git a/src/mesa/main/transformfeedback.h 
b/src/mesa/main/transformfeedback.h
index 6cad766..ba00636 100644
--- a/src/mesa/main/transformfeedback.h
+++ b/src/mesa/main/transformfeedback.h
@@ -155,4 +155,7 @@ extern void GLAPIENTRY
 _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint index, GLuint buffer,
GLintptr offset, GLsizeiptr size);
 
+extern void GLAPIENTRY
+_mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param);
+
 #endif /* TRANSFORM_FEEDBACK_H */
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] main: Added entry point for glTransformFeedbackBufferRange

2015-02-09 Thread Martin Peres
v2: review from Laura Ekstrand
- use the refactored code to lookup the objects
- improve some error messages
- factor out the gl method name computation
- better handle the spec differences between the DSA and non-DSA cases
- quote the spec a little more

v3: review from Laura Ekstrand
- use the new name of _mesa_lookup_bufferobj_err
- swap the comments around the offset and size checks

v4: review from Laura Ekstrand
- add more spec quotes
- properly fix the comments around the offset and size checks

Signed-off-by: Martin Peres 
---
 src/mapi/glapi/gen/ARB_direct_state_access.xml |  8 +++
 src/mesa/main/bufferobj.c  |  3 +-
 src/mesa/main/tests/dispatch_sanity.cpp|  1 +
 src/mesa/main/transformfeedback.c  | 98 +-
 src/mesa/main/transformfeedback.h  |  6 +-
 5 files changed, 97 insertions(+), 19 deletions(-)

diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml 
b/src/mapi/glapi/gen/ARB_direct_state_access.xml
index 35d6906..b3c090f 100644
--- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
+++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -20,6 +20,14 @@
   

 
+   
+  
+  
+  
+  
+  
+   
+

 

diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 86532ea..7558e17 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -3548,7 +3548,8 @@ _mesa_BindBufferRange(GLenum target, GLuint index,
case GL_TRANSFORM_FEEDBACK_BUFFER:
   _mesa_bind_buffer_range_transform_feedback(ctx,
  
ctx->TransformFeedback.CurrentObject,
- index, bufObj, offset, size);
+ index, bufObj, offset, size,
+ false);
   return;
case GL_UNIFORM_BUFFER:
   bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size);
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index 183755f..87f7d6f 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -957,6 +957,7 @@ const struct function gl_core_functions_possible[] = {
/* GL_ARB_direct_state_access */
{ "glCreateTransformFeedbacks", 45, -1 },
{ "glTransformFeedbackBufferBase", 45, -1 },
+   { "glTransformFeedbackBufferRange", 45, -1 },
{ "glCreateTextures", 45, -1 },
{ "glTextureStorage1D", 45, -1 },
{ "glTextureStorage2D", 45, -1 },
diff --git a/src/mesa/main/transformfeedback.c 
b/src/mesa/main/transformfeedback.c
index d932943..2dded21 100644
--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -541,7 +541,8 @@ bind_buffer_range(struct gl_context *ctx,
 /**
  * Specify a buffer object to receive transform feedback results.  Plus,
  * specify the starting offset to place the results, and max size.
- * Called from the glBindBufferRange() function.
+ * Called from the glBindBufferRange() and glTransformFeedbackBufferRange
+ * functions.
  */
 void
 _mesa_bind_buffer_range_transform_feedback(struct gl_context *ctx,
@@ -549,35 +550,74 @@ _mesa_bind_buffer_range_transform_feedback(struct 
gl_context *ctx,
GLuint index,
struct gl_buffer_object *bufObj,
GLintptr offset,
-   GLsizeiptr size)
+   GLsizeiptr size,
+   bool dsa)
 {
+   const char *gl_methd_name;
+   if (dsa)
+  gl_methd_name = "glTransformFeedbackBufferRange";
+   else
+  gl_methd_name = "glBindBufferRange";
+
+
if (obj->Active) {
-  _mesa_error(ctx, GL_INVALID_OPERATION,
-  "glBindBufferRange(transform feedback active)");
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(transform feedback active)",
+  gl_methd_name);
   return;
}
 
if (index >= ctx->Const.MaxTransformFeedbackBuffers) {
-  _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(index=%d "
-  "out of bounds)", index);
+  /* OpenGL 4.5 core profile, 6.1, pdf page 82: An INVALID_VALUE error is
+   * generated if index is greater than or equal to the number of binding
+   * points for transform feedback, as described in section 6.7.1.
+   */
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(index=%d out of bounds)",
+  gl_methd_name, index);
   return;
}
 
if (size & 0x3) {
-  /* must a multiple of four */
-  _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(size=%d)",
-  (int) size);
+  /* OpenGL 4.5 core profile, 6.7, pdf page 103: multiple of 4 */
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(size=%d must be a multiple of "
+  "four)", gl_me

Re: [Mesa-dev] [PATCH 18/32] i965/vec4: Don't attempt to reduce swizzles of send from GRF instructions.

2015-02-09 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez  wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index c3f68e6..aaa4873 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -437,7 +437,8 @@ vec4_visitor::opt_reduce_swizzle()
>> bool progress = false;
>>
>> foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
>> -  if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG)
>> +  if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG ||
>> +  inst->is_send_from_grf())
>>   continue;
>>
>>int swizzle[4];
>> --
>> 2.1.3
>
> Like the last patch, I totally believe this is a problem, I just want
> a little more explanation for how it was uncovered.

The problem is that it's not safe for opt_reduce_swizzle() to assume
that there is a one-to-one correspondence between vec4 channels of its
sources and its destination for send instructions.  For example, the
message may include a header which has an effect on all channels.  For
example:

| mov vgrf31.0:UD, 0D
| mov vgrf31.0.w:UD, 17D
| typed_surface_read vgrf33.0.x:UD, vgrf31.xyzw:UD, vgrf32.:UD, 1U

will get incorrectly reduced to:

| mov vgrf31.0:UD, 0D
| mov vgrf31.0.w:UD, 17D
| typed_surface_read vgrf33.0.x:UD, vgrf31.:UD, vgrf32.:UD, 1U

now subsequent optimization passes will assume that only the x component
of vgrf31 is used and will simplify the above code to:

| mov vgrf31.0.x:UD, 0D
| typed_surface_read vgrf33.0.x:UD, vgrf31.:UD, vgrf32.:UD, 1U


pgpbayODW9xsk.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/32] i965/vec4: Pass dst register to the vec4_instruction constructor.

2015-02-09 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez  wrote:
>> So regs_written gets initialized with a sensible value.
>> ---
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +--
>>  1 file changed, 5 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index babddee..514de11 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -2384,12 +2384,11 @@ vec4_visitor::visit(ir_call *ir)
>>  src_reg
>>  vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg 
>> sampler)
>>  {
>> -   vec4_instruction *inst = new(mem_ctx) 
>> vec4_instruction(SHADER_OPCODE_TXF_MCS);
>> +   vec4_instruction *inst =
>> +  new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
>> +dst_reg(this, glsl_type::uvec4_type));
>> inst->base_mrf = 2;
>> inst->mlen = 1;
>> -   inst->dst = dst_reg(this, glsl_type::uvec4_type);
>> -   inst->dst.writemask = WRITEMASK_XYZW;
>> -
>> inst->src[1] = sampler;
>>
>> /* parameters are: u, v, r, lod; lod will always be zero due to api 
>> restrictions */
>> @@ -2562,7 +2561,8 @@ vec4_visitor::visit(ir_texture *ir)
>>unreachable("Unrecognized tex op");
>> }
>>
>> -   vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode);
>> +   vec4_instruction *inst = new(mem_ctx) vec4_instruction(
>> +  opcode, dst_reg(this, ir->type));
>>
>> if (ir->offset != NULL && !has_nonconstant_offset) {
>>inst->offset =
>> @@ -2587,7 +2587,6 @@ vec4_visitor::visit(ir_texture *ir)
>>is_high_sampler(brw, sampler_reg);
>> inst->base_mrf = 2;
>> inst->mlen = inst->header_present + 1; /* always at least one */
>> -   inst->dst = dst_reg(this, ir->type);
>> inst->dst.writemask = WRITEMASK_XYZW;
>
> I think you probably want to delete this line too?
>
> If so,
>
> Reviewed-by: Matt Turner 

Thanks, fixed it locally.


pgpJz0aNXbBm_.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/32] i965/vec4: Fix the scheduler to take into account reads and writes of multiple registers.

2015-02-09 Thread Francisco Jerez
Matt Turner  writes:

> On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez  wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_ir_vec4.h |  1 +
>>  src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++-
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp  | 17 
>> +
>>  3 files changed, 28 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h 
>> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> index f11a2d2..941086f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> @@ -171,6 +171,7 @@ public:
>> unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header 
>> */
>>
>> bool is_send_from_grf();
>> +   unsigned regs_read(unsigned arg) const;
>> bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
>> void reswizzle(int dst_writemask, int swizzle);
>> bool can_do_source_mods(struct brw_context *brw);
>> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp 
>> b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> index 88feeca..2b22b2c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> @@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps()
>>/* read-after-write deps. */
>>for (int i = 0; i < 3; i++) {
>>   if (inst->src[i].file == GRF) {
>> -add_dep(last_grf_write[inst->src[i].reg], n);
>> +for (unsigned j = 0; j < inst->regs_read(i); ++j)
>> +   add_dep(last_grf_write[inst->src[i].reg + j], n);
>>   } else if (inst->src[i].file == HW_REG &&
>>  (inst->src[i].fixed_hw_reg.file ==
>>   BRW_GENERAL_REGISTER_FILE)) {
>> @@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps()
>>
>>/* write-after-write deps. */
>>if (inst->dst.file == GRF) {
>> - add_dep(last_grf_write[inst->dst.reg], n);
>> - last_grf_write[inst->dst.reg] = n;
>> + for (unsigned j = 0; j < inst->regs_written; ++j) {
>> +add_dep(last_grf_write[inst->dst.reg + j], n);
>> +last_grf_write[inst->dst.reg + j] = n;
>> + }
>>} else if (inst->dst.file == MRF) {
>>   add_dep(last_mrf_write[inst->dst.reg], n);
>>   last_mrf_write[inst->dst.reg] = n;
>> @@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps()
>>/* write-after-read deps. */
>>for (int i = 0; i < 3; i++) {
>>   if (inst->src[i].file == GRF) {
>> -add_dep(n, last_grf_write[inst->src[i].reg]);
>> +for (unsigned j = 0; j < inst->regs_read(i); ++j)
>> +   add_dep(n, last_grf_write[inst->src[i].reg + j]);
>>   } else if (inst->src[i].file == HW_REG &&
>>  (inst->src[i].fixed_hw_reg.file ==
>>   BRW_GENERAL_REGISTER_FILE)) {
>> @@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps()
>> * can mark this as WAR dependency.
>> */
>>if (inst->dst.file == GRF) {
>> - last_grf_write[inst->dst.reg] = n;
>> + for (unsigned j = 0; j < inst->regs_written; ++j)
>> +last_grf_write[inst->dst.reg + j] = n;
>>} else if (inst->dst.file == MRF) {
>>   last_mrf_write[inst->dst.reg] = n;
>>} else if (inst->dst.file == HW_REG &&
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index a4fd136..b2f3151 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -262,6 +262,23 @@ vec4_instruction::is_send_from_grf()
>> }
>>  }
>>
>> +unsigned
>> +vec4_instruction::regs_read(unsigned arg) const
>> +{
>> +   switch (opcode) {
>> +   case SHADER_OPCODE_SHADER_TIME_ADD:
>> +  return (arg == 0 ? mlen :
>> +  src[arg].file != BAD_FILE ? 1 : 0);
>> +
>> +   case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
>> +  return (arg == 1 ? mlen :
>> +  src[arg].file != BAD_FILE ? 1 : 0);
>> +
>> +   default:
>> +  return (src[arg].file != BAD_FILE ? 1 : 0);
>> +   }
>> +}
>
> Better to just do
>
>if (src[arg].file == BAD_FILE)
>   return 0;
>
> before the switch statement to avoid the nested ternary, or do you
> actually need to test arg == 0/1 before checking .file in those two
> cases?

Yeah, I've fixed that locally, thanks.


pgpWeqOJzKjir.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/14] i965: Don't tile 1D miptrees.

2015-02-09 Thread Neil Roberts
I made a similar patch in my local tree because it will be necessary for
Skylake which doesn't support tiling for 1D textures. I made a little
test to time rendering a large (4096) 1D texture here:

https://github.com/bpeel/glthing/tree/time-1d-texture

It gives about an 11% increase in FPS with this patch.

It seems like a good idea to me to make it check the height rather than
the texture dimensions as Kenneth says.

Regards,
- Neil

Kenneth Graunke  writes:

> On Saturday, February 07, 2015 02:46:31 AM Francisco Jerez wrote:
>> Kenneth Graunke  writes:
>> 
>> > On Friday, February 06, 2015 07:23:15 PM Francisco Jerez wrote:
>> >> It doesn't really improve locality of texture fetches, quite the
>> >> opposite it's a waste of memory bandwidth and space due to tile
>> >> alignment.
>> >> ---
>> >>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 
>> >>  1 file changed, 4 insertions(+)
>> >> 
>> >> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
>> >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> >> index 64752dd..311b204 100644
>> >> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> >> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> >> @@ -488,6 +488,10 @@ intel_miptree_choose_tiling(struct brw_context *brw,
>> >> base_format == GL_DEPTH_STENCIL_EXT)
>> >>return I915_TILING_Y;
>> >>  
>> >> +   if (mt->target == GL_TEXTURE_1D ||
>> >> +   mt->target == GL_TEXTURE_1D_ARRAY)
>> >> +  return I915_TILING_NONE;
>> >> +
>> >> int minimum_pitch = mt->total_width * mt->cpp;
>> >>  
>> >> /* If the width is much smaller than a tile, don't bother tiling. */
>> >> 
>> >
>> > What do you think about checking mt->logical_height0 == 1 instead?
>> > GLES doesn't have 1D textures, but people might use a 2D texture with
>> > height 1, and I think the same logic applies.
>> >
>> > I've also been thinking of trying to make core Mesa hide 1D textures and
>> > simply turn them into 2D textures with height = 1, so drivers don't have
>> > to deal with them.
>> >
>> > Either way, this seems like a good idea to me.
>> 
>> I guess that would work because the layer count of 1D array textures
>> ends up in logical_depth0 rather than logical_height0?  Sounds good to
>> me, but it would also have the side effect of not tiling "degenerate" 3D
>> textures with height=1 but depth>1, I guess we would still like to tile
>> those?  How about (mt->logical_height0 == 1 && (mt->logical_depth0 == 1 ||
>> mt->target != GL_TEXTURE_3D))?
>> 
>> Thanks Ken!
>
> I suppose that's fine, though frankly, trying to optimize a texture with
> height 1 at all seems pretty unnecessary to me.  I've never seen a 3D
> texture with height 1 in the wild...
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


pgpPZwNTC8EO1.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv2 32/32] i965: Don't compact instructions with unmapped bits.

2015-02-09 Thread Francisco Jerez
Some instruction bits don't have a mapping defined to any compacted
instruction field.  If they're ever set and we end up compacting the
instruction they will be forced to zero.  Avoid using compaction in such
cases.

v2: Align multiple lines of an expression to the same column.  Change
conditional compaction of 3-source instructions to an
assertion. (Matt)
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 8e33bcb..f80bcc1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -843,10 +843,53 @@ set_3src_source_index(struct brw_context *brw, 
brw_compact_inst *dst, brw_inst *
 }
 
 static bool
+has_unmapped_bits(struct brw_context *brw, brw_inst *src)
+{
+   /* Check for instruction bits that don't map to any of the fields of the
+* compacted instruction.  The instruction cannot be compacted if any of
+* them are set.  They overlap with:
+*  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
+*  - Dst.AddrImm[9] (bit 47 on Gen8)
+*  - Src0.AddrImm[9] (bit 95 on Gen8)
+*/
+   if (brw->gen >= 8)
+  return brw_inst_bits(src, 95, 95) ||
+ brw_inst_bits(src, 47, 47) ||
+ brw_inst_bits(src, 11, 11) ||
+ brw_inst_bits(src, 7,  7);
+   else
+  return brw_inst_bits(src, 95, 91) ||
+ brw_inst_bits(src, 47, 47) ||
+ brw_inst_bits(src, 7,  7) ||
+ (brw->gen < 7 && brw_inst_bits(src, 90, 90));
+}
+
+static bool
+has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src)
+{
+   /* Check for three-source instruction bits that don't map to any of the
+* fields of the compacted instruction.  All of them seem to be reserved
+* bits currently.
+*/
+   assert(brw->gen >= 8);
+   if (brw->gen >= 9 || brw->is_cherryview)
+  return brw_inst_bits(src, 127, 127) ||
+ brw_inst_bits(src, 105, 105) ||
+ brw_inst_bits(src, 7,  7);
+   else
+  return brw_inst_bits(src, 127, 126) ||
+ brw_inst_bits(src, 105, 105) ||
+ brw_inst_bits(src, 84, 84) ||
+ brw_inst_bits(src, 36, 35) ||
+ brw_inst_bits(src, 7,  7);
+}
+
+static bool
 brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst 
*dst,
  brw_inst *src)
 {
assert(brw->gen >= 8);
+   assert(!has_3src_unmapped_bits(brw, src));
 
 #define compact(field) \
brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src))
@@ -937,6 +980,9 @@ brw_try_compact_instruction(struct brw_context *brw, 
brw_compact_inst *dst,
   return false;
}
 
+   if (has_unmapped_bits(brw, src))
+  return false;
+
memset(&temp, 0, sizeof(temp));
 
brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src));
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv2 01/32] i965: Factor out virtual GRF allocation to a separate object.

2015-02-09 Thread Francisco Jerez
Right now virtual GRF book-keeping and allocation is performed in each
visitor class separately (among other hundred different things),
leading to duplicated logic in each visitor and preventing layering as
it forces any code that manipulates i965 IR and needs to allocate
virtual registers to depend on the specific visitor that happens to be
used to translate from GLSL IR.

v2: Use realloc()/free() to allocate VGRF book-keeping arrays (Connor).

Reviewed-by: Matt Turner 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 77 ---
 src/mesa/drivers/dri/i965/brw_fs.h |  8 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp   |  2 +-
 .../drivers/dri/i965/brw_fs_live_variables.cpp |  8 +-
 src/mesa/drivers/dri/i965/brw_fs_live_variables.h  |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  | 50 ++---
 .../drivers/dri/i965/brw_fs_register_coalesce.cpp  |  8 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 49 ++--
 src/mesa/drivers/dri/i965/brw_ir_allocator.h   | 87 ++
 .../drivers/dri/i965/brw_schedule_instructions.cpp | 10 +--
 src/mesa/drivers/dri/i965/brw_shader.h |  6 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 20 ++---
 src/mesa/drivers/dri/i965/brw_vec4.h   | 12 ---
 .../drivers/dri/i965/brw_vec4_copy_propagation.cpp |  8 +-
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp |  2 +-
 .../drivers/dri/i965/brw_vec4_live_variables.cpp   | 10 +--
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 43 ++-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 36 ++---
 19 files changed, 237 insertions(+), 203 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_ir_allocator.h

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 83c09e1..3acbb0b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -424,7 +424,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst,
 
assert(dst.width % 8 == 0);
int regs_written = 4 * (dst.width / 8) * scale;
-   fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written),
+   fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written),
dst.type, dst.width);
inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset);
inst->regs_written = regs_written;
@@ -688,7 +688,7 @@ fs_visitor::get_timestamp()
   0),
  BRW_REGISTER_TYPE_UD));
 
-   fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4);
+   fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4);
 
fs_inst *mov = emit(MOV(dst, ts));
/* We want to read the 3 fields we care about even if it's not enabled in
@@ -764,7 +764,7 @@ fs_visitor::emit_shader_time_end()
 
fs_reg start = shader_start_time;
start.negate = true;
-   fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1);
+   fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1);
emit(ADD(diff, start, shader_end_time));
 
/* If there were no instructions between the two timestamp gets, the diff
@@ -1029,26 +1029,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
}
 }
 
-int
-fs_visitor::virtual_grf_alloc(int size)
-{
-   if (virtual_grf_array_size <= virtual_grf_count) {
-  if (virtual_grf_array_size == 0)
-virtual_grf_array_size = 16;
-  else
-virtual_grf_array_size *= 2;
-  virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
-  virtual_grf_array_size);
-   }
-   virtual_grf_sizes[virtual_grf_count] = size;
-   return virtual_grf_count++;
-}
-
 fs_reg
 fs_visitor::vgrf(const glsl_type *const type)
 {
int reg_width = dispatch_width / 8;
-   return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width),
+   return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width),
  brw_type_for_base_type(type), dispatch_width);
 }
 
@@ -1056,7 +1041,7 @@ fs_reg
 fs_visitor::vgrf(int num_components)
 {
int reg_width = dispatch_width / 8;
-   return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width),
+   return fs_reg(GRF, alloc.allocate(num_components * reg_width),
  BRW_REGISTER_TYPE_F, dispatch_width);
 }
 
@@ -1912,14 +1897,14 @@ fs_visitor::assign_vs_urb_setup()
 void
 fs_visitor::split_virtual_grfs()
 {
-   int num_vars = this->virtual_grf_count;
+   int num_vars = this->alloc.count;
 
/* Count the total number of registers */
int reg_count = 0;
int vgrf_to_reg[num_vars];
for (int i = 0; i < num_vars; i++) {
   vgrf_to_reg[i] = reg_count;
-  reg_count += virtual_grf_sizes[i];
+  reg_count += alloc.sizes[i];
}
 
/* An array of "split points".  For each register slot, this indicates
@@ -1935,14 +

[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=77449
Bug 77449 depends on bug 84570, which changed state.

Bug 84570 Summary: Borderlands 2/Pre-Sequel: Constant frame rate drops while 
playing; really bad with additionl lighting
https://bugs.freedesktop.org/show_bug.cgi?id=84570

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 84570] Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=84570

Kai  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #34 from Kai  ---
Then I'm closing this bug. Thanks to everybody for your various contributions!

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/10] i965/fs: Combine constants and unconditionally emit MADs

2015-02-09 Thread Eero Tamminen

Hi,

On 02/05/2015 06:21 AM, Matt Turner wrote:

total instructions in shared programs: 5895414 -> 5747578 (-2.51%)
instructions in affected programs: 3618111 -> 3470275 (-4.09%)
helped:20492
HURT:  4449
GAINED:54
LOST:  146

and with NIR, that already emits MADs always:

total instructions in shared programs: 7992936 -> 7772474 (-2.76%)
instructions in affected programs: 3738730 -> 3518268 (-5.90%)
helped:22082
HURT:  3445
GAINED:70
LOST:  78

There are some particularly exciting individual improvements too:

kerbal-space-program/667: FS SIMD8: 1024 -> 561 (-45.21%)
kerbal-space-program/676: FS SIMD8: 1021 -> 558 (-45.35%)


From how old Kerbal version these shaders are from?

I just noticed that recent Kerbal outputs large amount of register spill 
& alloc failure messages with INTEL_DEBUG=perf, whereas output I've 
saved from a last year Kerbal version, had much less of them.




rocketbirds-hardboiled-chicken/fp-2: FS SIMD8: 1076 -> 582 (-45.91%)

Changing the fs_visitor and fp backend to always emit MADs is a little
like moving the goal posts for switching to NIR, but I think always
emitting MADs this is the right direction, and of course we'll discover
more improvements the more we work with the code.



- Eero

PS. output from games using Unity3D engine goes to:
 ~/.config/unity3d///Player.log

Using Apitrace replay makes seeing the warnings easier. :-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Expected wide line rendering with clipping

2015-02-09 Thread Erik Faye-Lund
On Mon, Feb 9, 2015 at 12:35 PM, Roland Scheidegger  wrote:
> Am 09.02.2015 um 09:53 schrieb Iago Toral:
>> On Fri, 2015-02-06 at 21:27 +0100, Roland Scheidegger wrote:
>>> Am 06.02.2015 um 13:11 schrieb Iago Toral:
 Hi,

 Eduardo and I have been looking into a few dEQP test failures that deal
 with wide line rendering. There are a few of them that fail because of
 how clipping is implemented for this case.

 The problem in these cases seems to be that the hw renders the wide line
 as a parallelogram so that if an edge of the parallelogram is partially
 clipped, the other parallel edge will also be clipped at the same
 height/width so that the resulting wide line stays a parallelogram. The
 dEQP renders as if a wide line where a collection of 1px lines, so
 cliping one edge of the resulting line does not have implications for
 the other edge.

 This ASCII art illustrates the problem (* represent line pixels, |
 represents the viewport's rightmost edge):

  Expected by dEQP  i965 rendering
  |  |
 *|  |
**|  |
   ***|  |
  |  |
  |  |
  |  |

 We can make the rendering result match dEQP's expectation by enabling
 the GuardBand in the clip stage (GEN6_CLIP_GB_TEST). This is being
 disabled by the driver for gen7 in this scenario because the underlying
 framebuffer surface is larger than the viewport, and per the comment in
 the code, in gen7 that would make it render beyond the viewport limits
 in that surface, while it notes that gen8 hw fixes this. So I guess that
 we do not want to do this in any case in gen7.

 Then, I have been reviewing the OpenGL specs to see if they clarify what
 should happen when clipping wide lines and I think the spec does not
 make a clear point, since when it talks about line clipping it does not
 cover the case of wide lines specifically:

 "13.5. PRIMITIVE CLIPPING
 ...
 If part of the line segment lies in the volume and part lies outside,
 then the line segment is clipped and new vertex coordinates are computed
 for one or both vertices. A clipped line segment endpoint lies on both
 the original line segment and the boundary of the clip volume.
 ...
 "

 The above description is clear for 1px lines, but where should the new
 vertex be generated exactly for wide lines, where many points could be
 at the boundary of the clip volume? Is any point valid? (that would mean
 the dEQP test is bogus because there are multiple accepted renderings),
 should the new vertex be exactly at the center of the line? (that would
 make both deqp and intel rendering bogus).

 Then there is also this comment in "14.5.2.2 Wide Lines" inside the
 non-AA line rasterization chapter:

 "Non-antialiased line segments of width other than one are rasterized by
 off-setting them in the minor direction (for an x-major line, the minor
 direction is y, and for a y-major line, the minor direction is x) and
 replicating fragments in the minor direction (see figure 14.3). Let w be
 the width rounded to the nearest integer (if w = 0, then it is as if w =
 1). If the line segment has endpoints given by (x 0 , y 0 ) and (x 1 , y
 1 ) in window coordinates, the segment with endpoints (x 0 , y 0 − (w −
 1)/2) and (x 1 , y 1 − (w − 1)/2) is rasterized, but instead of a single
 fragment, a column of fragments of height w (a row of fragments of
 length w for a y-major segment) is produced at each x (y for y-major)
 location. The lowest fragment of this column is the fragment that would
 be produced by rasterizing the segment of width 1 with the modified
 coordinates."

 This "suggests" that wide line rendering should be equivalent to
 rendering multiple lines of width=1 and if we actually did that then the
 expected rendering result after clipping would be what dEQP expects.

 I think the whole thing isn't 100% clear. Does anyone have any thoughts
 on what should be considered correct behavior in this case?

 Iago


>>>
>>> I am quite sure that i965 rendering is what (at least legacy GL)
>>> expects. This is because the wide-line expansion is defined to be
>>> happening at rasterization time, i.e. past clipping. Thus, at clipping
>>> time, no matter if a line is wide or not, it is still really a line,
>>> hence clipping will produce a single new endpoint at the viewport edge
>>> and in rasterization this is then what will be used as the endpoint.
>>
>> That makes sense, but in that case I think i965 rendering would still be
>> incorrect, the expected result would look something like this:
>>
>>   

[Mesa-dev] [PATCH] mesa: Rename the CEILING() macro to DIV_ROUND_UP().

2015-02-09 Thread Francisco Jerez
Some people have complained that code using the CEILING() macro is
difficult to understand because it's not immediately obvious what it
is supposed to do until you go and look up its definition.  Use a more
descriptive name that matches the similar utility macro in the Linux
kernel.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +-
 src/mesa/main/image.c  | 2 +-
 src/mesa/main/macros.h | 2 +-
 src/mesa/main/pack.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index fe05110..9d5e648 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3628,7 +3628,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
 */
this->uniform_array_size = 1;
if (prog_data) {
-  this->uniform_array_size = MAX2(CEILING(stage_prog_data->nr_params, 4),
+  this->uniform_array_size = MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 
4),
   1);
}
 
diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c
index c37900c..e97b006 100644
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -151,7 +151,7 @@ _mesa_image_offset( GLuint dimensions,
   assert(format == GL_COLOR_INDEX || format == GL_STENCIL_INDEX);
 
   bytes_per_row = alignment
-* CEILING( comp_per_pixel*pixels_per_row, 8*alignment );
+* DIV_ROUND_UP( comp_per_pixel*pixels_per_row, 8*alignment 
);
 
   bytes_per_image = bytes_per_row * rows_per_image;
 
diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h
index cd5f2d6..2d59c6f 100644
--- a/src/mesa/main/macros.h
+++ b/src/mesa/main/macros.h
@@ -808,7 +808,7 @@ DIFFERENT_SIGNS(GLfloat x, GLfloat y)
 
 
 /** Compute ceiling of integer quotient of A divided by B. */
-#define CEILING( A, B )  ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
+#define DIV_ROUND_UP( A, B )  ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
 
 
 /** casts to silence warnings with some compilers */
diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c
index 4cc8468..2111a76 100644
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -155,7 +155,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte 
*source,
if (!source)
   return;
 
-   width_in_bytes = CEILING( width, 8 );
+   width_in_bytes = DIV_ROUND_UP( width, 8 );
src = source;
for (row = 0; row < height; row++) {
   GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, dest,
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/fs: Fix fs_inst::regs_written calculation for instructions with scalar dst.

2015-02-09 Thread Francisco Jerez
Scalar registers are required to have zero stride, fix the
regs_written calculation not to assume that the instruction writes
zero registers in that case.

v2: Rename CEILING() to DIV_ROUND_UP(). (Matt, Ken)

Reviewed-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0a82fb7..b75d377 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -126,7 +126,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const 
fs_reg &dst,
case HW_REG:
case MRF:
case ATTR:
-  this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 
32;
+  this->regs_written =
+ DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32);
   break;
case BAD_FILE:
   this->regs_written = 0;
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util/u_atomic: Add new macro p_atomic_add

2015-02-09 Thread Jose Fonseca

On 06/02/15 22:39, Carl Worth wrote:

On Fri, Feb 06 2015, Aaron Watry wrote:

Ignore me if this is a stupid question, but should those both be
sizeof(short)?  I'd expect the first to be sizeof(char).


Not a stupid question. That was a copy-and-paste (kill-and-yank ?) bug
of mine.

Thanks for your attention to detail. I've fixed this in my tree.

-Carl


Hi Carl,

Just one more tweak to InterlockedExchangeAdd64 as per patch attached. 
(The 64bit intrisicis are only available on 64-bits, but the 
non-intrisinc version is available everywhee.)


With that u_test_atomic builds and passes for me both on 32 and 64bits.

Sorry for the delay. And thanks for your help in keeping MSVC support on 
par.


Jose

diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h
index 4eb0ec6..0c43410 100644
--- a/src/util/u_atomic.h
+++ b/src/util/u_atomic.h
@@ -147,10 +147,10 @@ char _InterlockedCompareExchange8(char volatile *Destination8, char Exchange8, c
  (assert(!"should not get here"), 0))
 
 #define p_atomic_add(_v, _i) (\
-   sizeof *(_v) == sizeof(short)   ? _InterlockedExchangeAdd8 ((char *)   (_v), (_i)) : \
+   sizeof *(_v) == sizeof(char)? _InterlockedExchangeAdd8 ((char *)   (_v), (_i)) : \
sizeof *(_v) == sizeof(short)   ? _InterlockedExchangeAdd16((short *)  (_v), (_i)) : \
sizeof *(_v) == sizeof(long)? _InterlockedExchangeAdd  ((long *)   (_v), (_i)) : \
-   sizeof *(_v) == sizeof(__int64) ? _InterlockedExchangeAdd64((__int64 *)(_v), (_i)) : \
+   sizeof *(_v) == sizeof(__int64) ? InterlockedExchangeAdd64 ((__int64 *)(_v), (_i)) : \
  (assert(!"should not get here"), 0))
 
 #define p_atomic_cmpxchg(_v, _old, _new) (\
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89043

Bug ID: 89043
   Summary: undefined symbol: _glapi_tls_Dispatch
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: adityaavina...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Hi,
I am trying to build mesa from git. It was working fine for a week. Yesterday
this error keeps poping up when I try to compile some tests.

I re-installed mesa. It didn't work.
I re-installed Ubuntu 14.04. It didn't work.

Nothing is getting compiled, the binaries glxinfo, glxgears are also throwing
the same error.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Bump MAX_IMAGE_UNIFORMS to 32.

2015-02-09 Thread Francisco Jerez
So the i965 driver can expose 32 image uniforms per shader stage.
---
 src/mesa/main/config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 4ec4b75..08e1a14 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -178,7 +178,7 @@
 #define MAX_COMBINED_ATOMIC_BUFFERS(MAX_UNIFORM_BUFFERS * 6)
 /* Size of an atomic counter in bytes according to ARB_shader_atomic_counters 
*/
 #define ATOMIC_COUNTER_SIZE4
-#define MAX_IMAGE_UNIFORMS 16
+#define MAX_IMAGE_UNIFORMS 32
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_IMAGE_UNITS(MAX_IMAGE_UNIFORMS * 6)
 /*@}*/
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/14] i965: Allocate binding table space for shader images.

2015-02-09 Thread Francisco Jerez
Kenneth Graunke  writes:

> On Saturday, February 07, 2015 03:03:44 AM Francisco Jerez wrote:
>> Kenneth Graunke  writes:
>> 
>> > On Friday, February 06, 2015 07:23:16 PM Francisco Jerez wrote:
>> >> Reviewed-by: Paul Berry 
>> >> ---
>> >>  src/mesa/drivers/dri/i965/brw_context.h  | 5 +
>> >>  src/mesa/drivers/dri/i965/brw_shader.cpp | 7 +++
>> >>  2 files changed, 12 insertions(+)
>> >> 
>> >> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> >> b/src/mesa/drivers/dri/i965/brw_context.h
>> >> index bebb0be..e28c65d 100644
>> >> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> >> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> >> @@ -341,6 +341,7 @@ struct brw_stage_prog_data {
>> >>uint32_t gather_texture_start;
>> >>uint32_t ubo_start;
>> >>uint32_t abo_start;
>> >> +  uint32_t image_start;
>> >>uint32_t shader_time_start;
>> >>/** @} */
>> >> } binding_table;
>> >> @@ -621,6 +622,9 @@ struct brw_vs_prog_data {
>> >>  /** Max number of atomic counter buffer objects in a shader */
>> >>  #define BRW_MAX_ABO 16
>> >>  
>> >> +/** Max number of image units in a shader */
>> >> +#define BRW_MAX_IMAGES 16
>> >> +
>> >
>> > Out of curiosity, why 16?  The Windows driver and Nvidia both seem to
>> > use 8, while AMD offers 32:
>> >
>> > http://delphigl.de/glcapsviewer/gl_stats_caps_single.php?listreportsbycap=GL_MAX_IMAGE_UNITS
>> >
>> > We have 32 texture units on Haswell these days (though honestly, it's
>> > pretty arbitrary...we could have more if we wanted...)
>> >
>> > That's not an objection - I'm just curious.  As is,
>> > Reviewed-by: Kenneth Graunke 
>> >
>> It's completely arbitrary too, IIRC I set it to 16 because back when I
>> wrote this patch we were still exposing 16 texture units, so I just used
>> the same value.  We could definitely expose 32 image units too, should I
>> just increase the value to 32 locally?
>
> That seems like a good idea to me, unless you can think of some reason
> not to.  I'll leave it up to you :)

Cool, I've increased BRW_MAX_IMAGES to 32 locally.  The core mesa patch
I just sent will also be required so the GL context and shader objects
have enough state entries to keep track of all these image units. :)


pgpjQCpi4ch2a.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: Define ffsll on OpenBSD.

2015-02-09 Thread Jon TURNEY

On 06/02/2015 19:58, Matt Turner wrote:

On Fri, Feb 6, 2015 at 3:38 AM, Jonathan Gray  wrote:

OpenBSD has ffs in libc but does not have ffsll so use the compiler
builtin.  PIPE_OS_BSD isn't suitable here as FreeBSD has ffsll in libc.

Signed-off-by: Jonathan Gray 
---
  src/gallium/auxiliary/util/u_math.h | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index 5db5b66..ec282f3 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -531,6 +531,8 @@ unsigned ffs( unsigned u )
  #elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
  #define ffs __builtin_ffs
  #define ffsll __builtin_ffsll
+#elif defined(__OpenBSD__)
+#define ffsll __builtin_ffsll
  #endif


Autoconf checks for presence of a bunch of builtins. Please use those
instead (in this case, HAVE___BUILTIN_FFSLL).


Yes, please.

This has just been 'fixed' for MinGW, now for OpenBSD, and also needs 
fixing for Cygwin.


Attached is a patch which attempts to do this using autoconf checks.

From 580eb16295a94012c488db7ac44d09cb3ca8ff55 Mon Sep 17 00:00:00 2001
From: Jon TURNEY 
Date: Sat, 7 Feb 2015 14:17:35 +
Subject: [PATCH] gallium/util: Use autoconf check for ffs() and ffsll()

u_math.h should probably explicitly include config.h, but it didn't before and
already relies on some HAVE_ defines, so I haven't added it.

This makes a subtle change on non-linux platforms: previously we were explicitly
using __builtin_ffs(), whereas now we are using ffs(), and the compiler might
decide to convert that to a builtin.  If we wanted to ensure the builtin was
used for some reason, this isn't quite right.

Since ffs() is used, we need to include  for the prototype, which I
think we expect to find on all POSIX systems, but not on MSVC.  Add an autoconf
check for that header and use that to conditionalize it's use.

Define _GNU_SOURCE on Cygwin so that the GNU extension ffsll() is prototyped.

I think I've put the right defines in the right place for the Android build
system, but I haven't build tested that.

Signed-off-by: Jon TURNEY 
---
 Android.common.mk   |  3 +++
 configure.ac|  6 +-
 src/gallium/auxiliary/util/u_math.h | 12 ++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/Android.common.mk b/Android.common.mk
index 3e6d4c3..7719096 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -42,6 +42,9 @@ LOCAL_CFLAGS += \
 
 LOCAL_CFLAGS += \
-DHAVE_PTHREAD=1 \
+   -DHAVE_STRINGS_H \
+   -DHAVE___BUILTIN_FFS \
+   -DHAVE___BUILTIN_FFSLL \
-fvisibility=hidden \
-Wno-sign-compare
 
diff --git a/configure.ac b/configure.ac
index 7c2692e..cd4d7ed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -172,6 +172,10 @@ if test "x$GCC" = xyes -a "x$acv_mesa_CLANG" = xno; then
 fi
 fi
 
+dnl Check for ffs and ffsll
+AC_CHECK_HEADER([strings.h])
+AC_CHECK_FUNCS([ffs ffsll])
+
 dnl Check for compiler builtins
 AX_GCC_BUILTIN([__builtin_bswap32])
 AX_GCC_BUILTIN([__builtin_bswap64])
@@ -219,7 +223,7 @@ solaris*)
 DEFINES="$DEFINES -DSVR4"
 ;;
 cygwin*)
-DEFINES="$DEFINES -D_XOPEN_SOURCE=700"
+DEFINES="$DEFINES -D_GNU_SOURCE"
 ;;
 esac
 
diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index 5db5b66..45587fb 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -52,7 +52,7 @@ extern "C" {
 #include 
 #include 
 
-#ifdef PIPE_OS_UNIX
+#ifdef HAVE_STRINGS_H
 #include  /* for ffs */
 #endif
 
@@ -528,10 +528,18 @@ unsigned ffs( unsigned u )
 
return i;
 }
-#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
+#else
+#ifndef HAVE_FFS
+#ifdef HAVE___BUILTIN_FFS
 #define ffs __builtin_ffs
+#endif
+#endif
+#ifndef HAVE_FFSLL
+#ifdef HAVE___BUILTIN_FFSLL
 #define ffsll __builtin_ffsll
 #endif
+#endif
+#endif
 
 #endif /* FFS_DEFINED */
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Rename mesa/src/util (Was: gallium/util: add u_bit_scan64)

2015-02-09 Thread Roland Scheidegger
FWIW using a different name for such helpers in src/util looks good to
me (and I'm pretty indifferent to the actual name). We already have
imports from mesa/main crept in there which need fixing and we certainly
don't want to make the same mistake with gallium includes.

Roland

Am 08.02.2015 um 21:08 schrieb Jose Fonseca:
> As I said earlier on this thread, it's not that simple: u_math.c depends on 
> u_cpu_detect.c and more.  I wish it was a matter of merely moving u_math.[ch] 
> to src/util, but it's not.
> 
> And because I don't believe I have the time to untangle u_math.[ch] from 
> everything else, I'm restrictied to more incremental solutions.  That is, add 
> a new header to src/util with an unique name, and cherry-pick bits and pieces 
> from u_math.[ch] as deemed necessary/practical.
> 
> Jose
> 
> 
> 
> From: Marek Olšák 
> Sent: 08 February 2015 11:27
> To: Jose Fonseca
> Cc: Matt Turner; Emil Velikov; ML mesa-dev
> Subject: Re: [Mesa-dev] Rename mesa/src/util (Was: gallium/util: add 
> u_bit_scan64)
> 
> I kind of like the "util_" prefix everywhere. u_math only depends on
> p_config.h and p_compiler.h. I don't think it would be hard to move
> those two into src/util as well. We have always wanted Mesa to use
> more of Gallium. This might be a good start.
> 
> Just my 2 cents.
> 
> Marek
> 
> On Sat, Feb 7, 2015 at 3:46 PM, Jose Fonseca  wrote:
>> On 07/02/15 00:10, Matt Turner wrote:
>>>
>>> On Fri, Feb 6, 2015 at 3:58 PM, Emil Velikov 
>>> wrote:
>
> "util" is meant to be for shared utility across the entire code base -
> both Mesa and Gallium.  It's been growing slowly as people move things
> there.  It might make sense to move a lot of src/gallium/auxiliary/util
> there, in fact - there's always been a lot of duplication between Mesa
> and Gallium's utility code.  But that's up to the Gallium developers.
>
 Imho currently the util library is growing on the basis of "we can
 share X let's throw it in there" rather than putting much thought
 about the structure/"design" of it - unlike in gallium.
>>>
>>>
>>> Are you serious? Let's be honest with ourselves. I probably would have
>>> been a better plan to not put commonly useful code deep in Gallium in
>>> the first place.
>>
>>
>> Historic reasons, as Brian explained.  Gallium was supposed to become a
>> dependency of Mesa but it didn't panned out.
>>
>>> Apparently this is what I get for trying to do the right thing an pull
>>> the atomics code out into a place the rest of the Mesa project can use
>>> it.
>>
>>
>> I really appreciate you went the extra mile there.  And for me it's way more
>> important that we start sharing code than the naming structure.
>>
>> Especially when naming is subject to test/style whereas code reuse is
>> something everybody can readily agree on.
>>
>> If the outcome of this email thread would be to dicentivate you to share
>> more code, then that would be worst outcome indeed.
>>
>> Anyway, let's get out of this criticism spiral, and instead focus on how we
>> can solve the issues to everybody's satisfaction.
>>
>>> How about instead of an annoying bikeshed thread we just finish moving
>>> bits of Gallium's util directory to src/util and be done with it?
>>
>>
>> If renaming src/util is not something we can agree fine.  Let's forget about
>> it.
>>
>>
>> But I don't think I (or anybody) has the time to move
>> src/gallium/auxiliary/util to src/util in one go.  The code is entangled
>> with src/gallium/include .
>>
>> That is, moving the whole src/gaullium/auxiliary/util to src/util equals to
>> add gallium as dependency to whole mesa.  If that's OK, then I agree with
>> Brian's suggestion: might as well do that (leave util in
>> src/gallium/axuliary ) and add src/gallium/* as includes/dependency
>> everwhere.
>>
>> I think for Mesa (src/mesa) this is fine.  I'm not sure about src/glsl.
>>
>> Again, I suspect this won't be something we'll agree neither.
>>
>>
>>
>> So I'm back to the beginning: I want to move some math helpers from
>> src/gallium/auxiliary/util/u_math to somewhere inside src/util.  I need
>> _some_ name: cgrt_*.h is no good, math.h would collide with standard C
>> headers, u_math.h would collide with src/gallium/auxiliary/util, so it must
>> be something else.  I'm open to suggestions.  If none I'll go with
>> mathhelpers.h
>>
>>
>>
>> Jose
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=sAgl8jEh0WnJsnslLG59t6U2AbWzLAmPO3N7rrA9YPE&s=L3d9gFZyZkC0vZ0eYA6Bl3TFEihZ0Btt56_Hl3iTvS0&e=
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedeskt

[Mesa-dev] [PATCH] i965: Don't tile 1D miptrees.

2015-02-09 Thread Francisco Jerez
It doesn't really improve locality of texture fetches, quite the
opposite it's a waste of memory bandwidth and space due to tile
alignment.

v2: Check mt->logical_height0 instead of mt->target (Ken).  Add short
comment explaining why they shouldn't be tiled.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 64752dd..0e3888f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -488,6 +488,13 @@ intel_miptree_choose_tiling(struct brw_context *brw,
base_format == GL_DEPTH_STENCIL_EXT)
   return I915_TILING_Y;
 
+   /* 1D textures (and 1D array textures) don't get any benefit from tiling,
+* in fact it leads to a less efficient use of memory space and bandwidth
+* due to tile alignment.
+*/
+   if (mt->logical_height0 == 1)
+  return I915_TILING_NONE;
+
int minimum_pitch = mt->total_width * mt->cpp;
 
/* If the width is much smaller than a tile, don't bother tiling. */
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/14] i965/gen7-8: Implement glMemoryBarrier().

2015-02-09 Thread Francisco Jerez
Kristian Høgsberg  writes:

> On Fri, Feb 6, 2015 at 9:23 AM, Francisco Jerez  wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_program.c | 40 
>> +
>>  src/mesa/drivers/dri/i965/intel_reg.h   |  1 +
>>  2 files changed, 41 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
>> b/src/mesa/drivers/dri/i965/brw_program.c
>> index d9a3f05..793d963 100644
>> --- a/src/mesa/drivers/dri/i965/brw_program.c
>> +++ b/src/mesa/drivers/dri/i965/brw_program.c
>> @@ -44,6 +44,7 @@
>>  #include "brw_context.h"
>>  #include "brw_shader.h"
>>  #include "brw_wm.h"
>> +#include "intel_batchbuffer.h"
>>
>>  static unsigned
>>  get_new_program_id(struct intel_screen *screen)
>> @@ -179,6 +180,43 @@ brwProgramStringNotify(struct gl_context *ctx,
>> return true;
>>  }
>>
>> +static void
>> +brwMemoryBarrier(struct gl_context *ctx, GLbitfield barriers)
>> +{
>> +   struct brw_context *brw = brw_context(ctx);
>> +   unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
>> +PIPE_CONTROL_NO_WRITE |
>> +PIPE_CONTROL_CS_STALL);
>> +   assert(brw->gen >= 7 && brw->gen <= 8);
>> +
>> +   if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
>> +   GL_ELEMENT_ARRAY_BARRIER_BIT |
>> +   GL_COMMAND_BARRIER_BIT))
>> +  bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
>> +
>> +   if (barriers & GL_UNIFORM_BARRIER_BIT)
>> +  bits |= (PIPE_CONTROL_TC_FLUSH |
>> +   PIPE_CONTROL_CONST_CACHE_INVALIDATE);
>> +
>> +   if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
>> +  bits |= PIPE_CONTROL_TC_FLUSH;
>> +
>> +   if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
>> +  bits |= PIPE_CONTROL_WRITE_FLUSH;
>> +
>> +   if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
>> +  bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
>> +   PIPE_CONTROL_WRITE_FLUSH);
>> +
>> +   /* Typed surface messages are handled by the render cache on IVB, so we
>> +* need to flush it too.
>> +*/
>> +   if (brw->gen == 7 && !brw->is_haswell)
>> +  bits |= PIPE_CONTROL_WRITE_FLUSH;
>> +
>> +   brw_emit_pipe_control_flush(brw, bits);
>> +}
>> +
>>  void
>>  brw_add_texrect_params(struct gl_program *prog)
>>  {
>> @@ -236,6 +274,8 @@ void brwInitFragProgFuncs( struct dd_function_table 
>> *functions )
>>
>> functions->NewShader = brw_new_shader;
>> functions->LinkShader = brw_link_shader;
>> +
>> +   functions->MemoryBarrier = brwMemoryBarrier;
>
> Just a drive-by nitpick: brw_memory_barrier?
>
I'm OK with that, I've changed it locally.  I used camel case because it
seemed like all the other dd hooks implemented in the same file used it.
Apparently all other functions defined in it don't, so I guess it's
going to be inconsistent anyway.

> Kristian
>
>>  }
>>
>>  void
>> diff --git a/src/mesa/drivers/dri/i965/intel_reg.h 
>> b/src/mesa/drivers/dri/i965/intel_reg.h
>> index 5ac0180..8b630c5 100644
>> --- a/src/mesa/drivers/dri/i965/intel_reg.h
>> +++ b/src/mesa/drivers/dri/i965/intel_reg.h
>> @@ -70,6 +70,7 @@
>>  #define PIPE_CONTROL_ISP_DIS   (1 << 9)
>>  #define PIPE_CONTROL_INTERRUPT_ENABLE  (1 << 8)
>>  /* GT */
>> +#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5)
>>  #define PIPE_CONTROL_VF_CACHE_INVALIDATE   (1 << 4)
>>  #define PIPE_CONTROL_CONST_CACHE_INVALIDATE(1 << 3)
>>  #define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1 << 2)
>> --
>> 2.1.3
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


pgpQcke_WQmt0.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] nesa-10.4.4: gallivm/lp_bld_misc.cpp:503:38: error: no viable conversion from 'ShaderMemoryManager *' to 'std::unique_ptr'

2015-02-09 Thread Emil Velikov
On 07/02/15 21:44, Sedat Dilek wrote:
> Hi,
> 
> I was building mesa v10.4.4 with my llvm-toolchain v3.6.0rc2.
> 
> My build breaks like this...
> 
> ...
> 
> Please cherry-pick...
> 
> commit ef7e0b39a24966526b102643523feac765771842
> "gallivm: Update for RTDyldMemoryManager becoming an unique_ptr."
> 
> ..for mesa 10.4 Git branch.
> 
Hi Sedat,

Picking a fix in a stable branch against a non-final release sounds like
a no-go in our books. As the official llvm 3.6 rolls out we'll pick this
fix for the stable branches - until then I would recommend (a) applying
it locally or (b) using mesa from the 10.5 or master branch.

Thanks
Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] mesa-10.4.4: BROKEN TLS support in GLX with llvm-toolchain v3.6.0rc2

2015-02-09 Thread Emil Velikov
Hi Sedat,

On 07/02/15 22:42, Sedat Dilek wrote:
> [ Please CC me I am not subscribed to mesa-dev and llvmdev MLs ]
> 
> Hi,
> 
> I already reported this when playing 1st time with my llvm-toolchain
> v3.6.0rc2 and mesa v10.3.7 [1].
> The issue still remains in mesa v10.4.4.
> 
> So, this is a field test to see if LLVM/Clang v3.6.0rc2 fits my needs.
> 
> I see the following build-error...
> ...
> 
> make[4]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mapi'
>   CC shared_glapi_libglapi_la-entry.lo
> clang version 3.6.0 (tags/RELEASE_360/rc2)
> Target: x86_64-unknown-linux-gnu
> Thread model: posix
> Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6
> Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6.4
> Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
> Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2
> Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
> Candidate multilib: .;@m64
> Candidate multilib: 32;@m32
> Selected multilib: .;@m64
>  "/opt/llvm-toolchain-3.6.0rc2/bin/clang" -cc1 -triple
> x86_64-unknown-linux-gnu -emit-obj -mrelax-all -disable-free
> -main-file-name entry.c -mrelocation-model static -mthread-model posix
> -mdisable-fp-elim -relaxed-aliasing -fmath-errno -masm-verbose
> -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu
> x86-64 -target-linker-version 2.22 -v -g -dwarf-column-info
> -coverage-file /home/wearefam/src/mesa/mesa-git/src/mapi/entry.c
> -resource-dir /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0
> -dependency-file .deps/shared_glapi_libglapi_la-entry.Tpo
> -sys-header-deps -MP -MT shared_glapi_libglapi_la-entry.lo -D
> "PACKAGE_NAME=\"Mesa\"" -D "PACKAGE_TARNAME=\"mesa\"" -D
> "PACKAGE_VERSION=\"10.4.4\"" -D "PACKAGE_STRING=\"Mesa 10.4.4\"" -D
> "PACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\"";
> -D "PACKAGE_URL=\"\"" -D "PACKAGE=\"mesa\"" -D "VERSION=\"10.4.4\"" -D
> STDC_HEADERS=1 -D HAVE_SYS_TYPES_H=1 -D HAVE_SYS_STAT_H=1 -D
> HAVE_STDLIB_H=1 -D HAVE_STRING_H=1 -D HAVE_MEMORY_H=1 -D
> HAVE_STRINGS_H=1 -D HAVE_INTTYPES_H=1 -D HAVE_STDINT_H=1 -D
> HAVE_UNISTD_H=1 -D HAVE_DLFCN_H=1 -D "LT_OBJDIR=\".libs/\"" -D
> YYTEXT_POINTER=1 -D HAVE___BUILTIN_BSWAP32=1 -D
> HAVE___BUILTIN_BSWAP64=1 -D HAVE___BUILTIN_CLZ=1 -D
> HAVE___BUILTIN_CLZLL=1 -D HAVE___BUILTIN_CTZ=1 -D
> HAVE___BUILTIN_EXPECT=1 -D HAVE___BUILTIN_FFS=1 -D
> HAVE___BUILTIN_FFSLL=1 -D HAVE___BUILTIN_POPCOUNT=1 -D
> HAVE___BUILTIN_POPCOUNTLL=1 -D HAVE___BUILTIN_UNREACHABLE=1 -D
> HAVE_DLADDR=1 -D HAVE_PTHREAD=1 -D HAVE_LIBEXPAT=1 -D
> USE_EXTERNAL_DXTN_LIB=1 -D _GNU_SOURCE -D USE_SSE41 -D DEBUG -D
> USE_X86_64_ASM -D HAVE_XLOCALE_H -D HAVE_STRTOF -D HAVE_DLOPEN -D
> HAVE_POSIX_MEMALIGN -D HAVE_LIBDRM -D GLX_USE_DRM -D HAVE_LIBUDEV -D
> GLX_INDIRECT_RENDERING -D GLX_DIRECT_RENDERING -D GLX_USE_TLS -D
> HAVE_ALIAS -D HAVE_MINCORE -D HAVE_LLVM=0x0306 -D LLVM_VERSION_PATCH=0
> -D MAPI_MODE_GLAPI -D
> "MAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\"" -I . -I
> ../../include -I ../../src/mapi -I ../../src/mapi -I /opt/xorg/include
> -internal-isystem /usr/local/include -internal-isystem
> /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include
> -internal-externc-isystem /usr/include/x86_64-linux-gnu
> -internal-externc-isystem /include -internal-externc-isystem
> /usr/include -O0 -Wall -Werror=implicit-function-declaration
> -Werror=missing-prototypes -std=c99 -fdebug-compilation-dir
> /home/wearefam/src/mesa/mesa-git/src/mapi -ferror-limit 19
> -fmessage-length 0 -pthread -mstackrealign -fobjc-runtime=gcc
> -fdiagnostics-show-option -o entry.o -x c ../../src/mapi/entry.c
> clang -cc1 version 3.6.0 based upon LLVM 3.6.0 default target
> x86_64-unknown-linux-gnu
> ignoring nonexistent directory "/include"
> ignoring duplicate directory "."
> ignoring duplicate directory "."
> #include "..." search starts here:
> #include <...> search starts here:
>  .
>  ../../include
>  /opt/xorg/include
>  /usr/local/include
>  /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include
>  /usr/include/x86_64-linux-gnu
>  /usr/include
> End of search list.
> In file included from ../../src/mapi/entry.c:49:
> ./entry_x86-64_tls.h:66:1: warning: tentative array definition assumed
> to have one element
> x86_64_entry_start[];
> ^
> fatal error: error in backend: symbol 'x86_64_entry_start' is already defined
> clang: error: clang frontend command failed with exit code 70 (use -v
> to see invocation)
> clang version 3.6.0 (tags/RELEASE_360/rc2)
> Target: x86_64-unknown-linux-gnu
> Thread model: posix
> clang: note: diagnostic msg: PLEASE submit a bug report to
> http://llvm.org/bugs/ and include the crash backtrace, preprocessed
> source, and associated run script.
> clang: note: diagnostic msg:
> 
> 
> PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:
> Preprocessed source(s) and associated run script(s) are located at:
> clang: note: diagnostic msg: /tmp/

Re: [Mesa-dev] [PATCH 11/14] i965/gen7: Enable fragment shader dispatch if the program has image uniforms.

2015-02-09 Thread Francisco Jerez
Kenneth Graunke  writes:

> On Friday, February 06, 2015 07:23:25 PM Francisco Jerez wrote:
>> Shaders with image uniforms may have side effects.  Make sure that
>> fragment shader threads are dispatched if the shader has any image
>> uniforms.
>> ---
>>  src/mesa/drivers/dri/i965/gen7_wm_state.c | 6 +-
>>  1 file changed, 5 insertions(+), 1 deletion(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
>> b/src/mesa/drivers/dri/i965/gen7_wm_state.c
>> index 923414e..034ce08 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
>> @@ -39,6 +39,9 @@ upload_wm_state(struct brw_context *brw)
>> /* BRW_NEW_FRAGMENT_PROGRAM */
>> const struct brw_fragment_program *fp =
>>brw_fragment_program_const(brw->fragment_program);
>> +   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
>> +   struct gl_shader *shader =
>> +  prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL;
>> /* BRW_NEW_FS_PROG_DATA */
>> const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
>> bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
>> @@ -76,8 +79,9 @@ upload_wm_state(struct brw_context *brw)
>>dw1 |= GEN7_WM_KILL_ENABLE;
>> }
>>  
>> -   /* _NEW_BUFFERS | _NEW_COLOR */
>> +   /* _NEW_BUFFERS | _NEW_COLOR, BRW_NEW_FRAGMENT_PROGRAM */
>> if (brw_color_buffer_write_enabled(brw) || writes_depth ||
>> +   (shader && shader->NumImages) ||
>> dw1 & GEN7_WM_KILL_ENABLE) {
>>dw1 |= GEN7_WM_DISPATCH_ENABLE;
>> }
>> 
>
> Do you need to make a similar change in gen8_ps_state.c?  I suppose it
> could automatically get enabled when UAVs are on, but I haven't checked.
>

Nope, it's not necessary.  There is no dispatch enable bit on Gen8,
instead it's automatically calculated based on a number of things
including the UAV access bits I'm setting in PATCH 12.

> I've been trying to migrate the state upload code to exclusively rely on
> prog_data, and not poke at the core Mesa shader structures directly
> (and only listen to BRW_NEW_FS_PROG_DATA, not BRW_NEW_FRAGMENT_PROGRAM).
>
Okay, I'll fix that.

> What do you think about adding a "uses_images" or "has_uav" flag to
> prog_data and using it here?
>
I think we could just check prog_data->nr_image_params which is already
there.

> While I think I'd prefer that, this should also work, so it gets:
> Reviewed-by: Kenneth Graunke 

Thanks!


pgpBgeIxeD0GY.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Don't tile 1D miptrees.

2015-02-09 Thread Neil Roberts
Looks good to me.

Reviewed-by: Neil Roberts 

- Neil

Francisco Jerez  writes:

> It doesn't really improve locality of texture fetches, quite the
> opposite it's a waste of memory bandwidth and space due to tile
> alignment.
>
> v2: Check mt->logical_height0 instead of mt->target (Ken).  Add short
> comment explaining why they shouldn't be tiled.
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 64752dd..0e3888f 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -488,6 +488,13 @@ intel_miptree_choose_tiling(struct brw_context *brw,
> base_format == GL_DEPTH_STENCIL_EXT)
>return I915_TILING_Y;
>  
> +   /* 1D textures (and 1D array textures) don't get any benefit from tiling,
> +* in fact it leads to a less efficient use of memory space and bandwidth
> +* due to tile alignment.
> +*/
> +   if (mt->logical_height0 == 1)
> +  return I915_TILING_NONE;
> +
> int minimum_pitch = mt->total_width * mt->cpp;
>  
> /* If the width is much smaller than a tile, don't bother tiling. */
> -- 
> 2.1.3


pgpW186qYlh05.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/skl: Implement WaDisable1DDepthStencil

2015-02-09 Thread Neil Roberts
Skylake+ doesn't support setting a depth buffer to a 1D surface but it
does allow pretending it's a 2D texture with a height of 1 instead.

This fixes the GL_DEPTH_COMPONENT_* tests of the copyteximage piglit
test (and also seems to avoid a subsequent GPU hang).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89037
---
This probably only makes sense on top of Francisco Jerez's patch here:

http://lists.freedesktop.org/archives/mesa-dev/2015-February/076392.html

The copyteximage test is still failing with some other formats such as
GL_RGB16 and some intensity and luminance formats but I'm just looking
into that now.

 src/mesa/drivers/dri/i965/gen8_depth_state.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index e428089..b4eb6e1 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -190,6 +190,18 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
case GL_TEXTURE_3D:
   assert(mt);
   depth = MAX2(mt->logical_depth0, 1);
+  surftype = translate_tex_target(gl_target);
+  break;
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_1D:
+  if (brw->gen >= 9) {
+ /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth
+  * textures but it does allow pretending it's a 2D texture
+  * instead.
+  */
+ surftype = BRW_SURFACE_2D;
+ break;
+  }
   /* fallthrough */
default:
   surftype = translate_tex_target(gl_target);
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] util/u_atomic: Add new macro p_atomic_add

2015-02-09 Thread Carl Worth
On Mon, Feb 09 2015, Jose Fonseca wrote:
> Just one more tweak to InterlockedExchangeAdd64 as per patch attached. 
..
> With that u_test_atomic builds and passes for me both on 32 and
> 64bits.

Excellent. Thanks for the fix and for the testing report.

> Sorry for the delay. And thanks for your help in keeping MSVC support on 
> par.

No problem with the delay. The timing is just right since I'm about to
send to the list an updated cache implementation that relies on this new
atomic_add, and that also includes some unit tests that I'm hoping
people can help test.

(But, fair warning, there's a bit of file-system manipulation code there
that I think will probably need some attention to work on non-Linux
systems.)

-Carl


pgppFCoguqri0.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Rename the CEILING() macro to DIV_ROUND_UP().

2015-02-09 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/fs: Fix fs_inst::regs_written calculation for instructions with scalar dst.

2015-02-09 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] glsl: Making logical operations per-component?

2015-02-09 Thread Ian Romanick
On 02/08/2015 04:10 PM, Matt Turner wrote:
> The GLSL IR ir_binop_logic_* operations match the source language
> operators in that they only operate on scalars.
> 
> In talking to Ilia, I realized that the vectorizer pass doesn't know
> about that, and so it will happily vectorize the vertex shader in the
> piglit test below.
> 
> The i965 driver emits perfectly fine code for this and passes the
> test, but ir_validate asserts that the operands to && are scalar
> booleans.
> 
> Should we relax this restriction and let the logical and/or/xor/not
> operations operate per-component? Perhaps a small GLSL extension
> codifying this and allowing it in the source language should be in
> order?

That sounds reasonable enough.  It would just be changing a restriction
specific to the IR, and it should affect the behavior observed by
applications.

I agree with Connor.  Adding and, or, and xor functions similar to the
existing not and comparitor functions would be the right way to expose this.

> Sounds like something like this would simplify some code Ilia's working on.
> 
> 
> [require]
> GLSL >= 1.30
> 
> [vertex shader]
> in vec4 vertex;
> out vec4 color;
> 
> uniform bvec4 a, b;
> 
> void main() {
> gl_Position = vertex;
> 
> color.x = float(a.x && b.x);
> color.y = float(a.y && b.y);
> color.z = float(a.z && b.z);
> color.w = float(a.w && b.w);
> }
> 
> [fragment shader]
> in vec4 color;
> out vec4 frag_color;
> 
> void main()
> {
> frag_color = color;
> }
> 
> [vertex data]
> vertex/float/2
> -1.0 -1.0
>  1.0 -1.0
>  1.0  1.0
> -1.0  1.0
> 
> [test]
> uniform ivec4 a 1 1 1 1
> uniform ivec4 b 1 0 0 1
> draw arrays GL_TRIANGLE_FAN 0 4
> probe all rgba 1.0 0.0 0.0 1.0
> 
`
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 09/28] glsl/ir: Add builtin function support for doubles

2015-02-09 Thread Ian Romanick
On 02/07/2015 05:25 PM, Matt Turner wrote:
> On Sat, Feb 7, 2015 at 5:16 PM, Ilia Mirkin  wrote:
>> On Sat, Feb 7, 2015 at 8:10 PM, Ilia Mirkin  wrote:
> diff --git a/src/glsl/ir.h b/src/glsl/ir.h
> index a0f48b2..6e7c654 100644
> --- a/src/glsl/ir.h
> +++ b/src/glsl/ir.h
> @@ -1275,6 +1275,13 @@ enum ir_expression_operation {
> ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */
> ir_unop_any,
>
> +   ir_unop_d2f, /**< Double-to-float conversion. */
> +   ir_unop_f2d, /**< Float-to-double conversion. */
> +   ir_unop_d2i, /**< Double-to-integer conversion. */
> +   ir_unop_i2d, /**< Integer-to-double conversion. */
> +   ir_unop_d2u, /**< Double-to-unsigned conversion. */
> +   ir_unop_u2d, /**< Unsigned-to-double conversion. */
> +

 These should be properly ordered with the other cast operations.  Also,
 ir_unop_d2b is missing.
>>>
>>> Blast! You're right. I thought we were OK with d2f + f2b but... I
>>> guess not. I'll add it in :(
>>
>> BTW, I don't see a logic to the current cast operations' order, so I'm
>> going to leave these as-is (but add d2b in... b2d is actually probably
>> unnecessary).

Right... f2d(b2f(x)) should be fine.

> I'd at least put them above the bitcast operations and any with the
> other similar things like f2u.

Yeah... I wasn't completely clear, but that's what I was thinking too.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv2 11/14] i965/gen7: Enable fragment shader dispatch if the program has image uniforms.

2015-02-09 Thread Francisco Jerez
Shaders with image uniforms may have side effects.  Make sure that
fragment shader threads are dispatched if the shader has any image
uniforms.

v2: Use brw_stage_state::nr_image_params to find out if the shader has
image uniforms instead of checking core mesa data structures (Ken).
---
 src/mesa/drivers/dri/i965/gen7_wm_state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 923414e..e426b30 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -78,6 +78,7 @@ upload_wm_state(struct brw_context *brw)
 
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+   brw->wm.base.nr_image_params ||
dw1 & GEN7_WM_KILL_ENABLE) {
   dw1 |= GEN7_WM_DISPATCH_ENABLE;
}
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 12/28] glsl/ir: Add builtin constant function support for doubles

2015-02-09 Thread Ian Romanick
On 02/07/2015 06:25 PM, Ilia Mirkin wrote:
> On Fri, Feb 6, 2015 at 3:02 AM, Ian Romanick  wrote:
>>> @@ -708,6 +763,9 @@ ir_expression::constant_expression_value(struct 
>>> hash_table *variable_context)
>>>case GLSL_TYPE_FLOAT:
>>>   data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]);
>>>   break;
>>> +  case GLSL_TYPE_DOUBLE:
>>> + data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]);
>>> + break;
>>>default:
>>>   assert(0);
>>
>> Maybe follow-up by replacing a bunch of these with unreachable().
> 
> That seems highly unrelated to this series. Good newbie project though
> -- audit all the assert(const)'s and convert into unreachable as
> necessary. [I think there's basically no case where assert(const) is
> right, in light of unreachable existing...]

It's related in that near by code is being touched.  That makes it fresh
in the minds of reviewers.  It's not a big deal either way.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv2 12/14] i965/gen7-8: Poke the 3DSTATE UAV access enable bits.

2015-02-09 Thread Francisco Jerez
v2: Set the PS UAV-only bit on HSW (Ken).
---
 src/mesa/drivers/dri/i965/brw_defines.h   |  4 
 src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 13 -
 src/mesa/drivers/dri/i965/gen7_wm_state.c |  9 +
 src/mesa/drivers/dri/i965/gen8_gs_state.c |  4 +++-
 src/mesa/drivers/dri/i965/gen8_ps_state.c |  3 +++
 src/mesa/drivers/dri/i965/gen8_vs_state.c |  4 +++-
 7 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index fe255cc..d4648ee 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1691,6 +1691,7 @@ enum brw_message_target {
 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT   18
 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754  (0 << 16)
 # define GEN6_VS_FLOATING_POINT_MODE_ALT   (1 << 16)
+# define HSW_VS_UAV_ACCESS_ENABLE   (1 << 12)
 /* DW4 */
 # define GEN6_VS_DISPATCH_START_GRF_SHIFT  20
 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -1716,6 +1717,7 @@ enum brw_message_target {
 # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT   18
 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754  (0 << 16)
 # define GEN6_GS_FLOATING_POINT_MODE_ALT   (1 << 16)
+# define HSW_GS_UAV_ACCESS_ENABLE  (1 << 12)
 /* DW4 */
 # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT  23
 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17
@@ -2242,6 +2244,7 @@ enum brw_wm_barycentric_interp_mode {
 /* DW2 */
 # define GEN7_WM_MSDISPMODE_PERSAMPLE  (0 << 31)
 # define GEN7_WM_MSDISPMODE_PERPIXEL   (1 << 31)
+# define HSW_WM_UAV_ONLY(1 << 30)
 
 #define _3DSTATE_PS0x7820 /* GEN7+ */
 /* DW1: kernel pointer */
@@ -2264,6 +2267,7 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE   (1 << 8)
 # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE  (1 << 7)
 # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE  (1 << 6)
+# define HSW_PS_UAV_ACCESS_ENABLE  (1 << 5)
 # define GEN7_PS_POSOFFSET_NONE(0 << 3)
 # define GEN7_PS_POSOFFSET_CENTROID(2 << 3)
 # define GEN7_PS_POSOFFSET_SAMPLE  (3 << 3)
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c 
b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index e1c4f8b..39e5201 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,7 +59,9 @@ upload_gs_state(struct brw_context *brw)
   OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
  GEN6_GS_SAMPLER_COUNT_SHIFT) |
 ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+(brw->is_haswell && stage_state->nr_image_params ?
+ HSW_GS_UAV_ACCESS_ENABLE : 0));
 
   if (brw->gs.prog_data->base.base.total_scratch) {
  OUT_RELOC(stage_state->scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c 
b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0e9b4fe..4741b78 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -77,6 +77,7 @@ upload_vs_state(struct brw_context *brw)
uint32_t floating_point_mode = 0;
const int max_threads_shift = brw->is_haswell ?
   HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+   const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base;
 
if (!brw->is_haswell && !brw->is_baytrail)
   gen7_emit_vs_workaround_flush(brw);
@@ -91,19 +92,21 @@ upload_vs_state(struct brw_context *brw)
 ((ALIGN(stage_state->sampler_count, 4)/4) <<
   GEN6_VS_SAMPLER_COUNT_SHIFT) |
  ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
-  GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+  GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (brw->is_haswell && stage_state->nr_image_params ?
+  HSW_VS_UAV_ACCESS_ENABLE : 0));
 
-   if (brw->vs.prog_data->base.base.total_scratch) {
+   if (prog_data->base.total_scratch) {
   OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-   ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
+   ffs(prog_data->base.total_scratch) - 11);
} else {
   OUT_BATCH(0);
}
 
-   OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg <<
+   OUT_BATCH((prog_data->base.dispatch_grf_start_reg <<
   GEN6_VS_DISPATCH_START_GRF_SHIFT) |
-(brw->vs.prog_data->base.urb_read_length << 
GEN6_VS_URB_READ_LENGTH_SHIFT) |
+   

Re: [Mesa-dev] [PATCHv2 32/32] i965: Don't compact instructions with unmapped bits.

2015-02-09 Thread Matt Turner
On Mon, Feb 9, 2015 at 6:08 AM, Francisco Jerez  wrote:
> Some instruction bits don't have a mapping defined to any compacted
> instruction field.  If they're ever set and we end up compacting the
> instruction they will be forced to zero.  Avoid using compaction in such
> cases.
>
> v2: Align multiple lines of an expression to the same column.  Change
> conditional compaction of 3-source instructions to an
> assertion. (Matt)
> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 46 
> ++
>  1 file changed, 46 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 8e33bcb..f80bcc1 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -843,10 +843,53 @@ set_3src_source_index(struct brw_context *brw, 
> brw_compact_inst *dst, brw_inst *
>  }
>
>  static bool
> +has_unmapped_bits(struct brw_context *brw, brw_inst *src)
> +{
> +   /* Check for instruction bits that don't map to any of the fields of the
> +* compacted instruction.  The instruction cannot be compacted if any of
> +* them are set.  They overlap with:
> +*  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
> +*  - Dst.AddrImm[9] (bit 47 on Gen8)
> +*  - Src0.AddrImm[9] (bit 95 on Gen8)
> +*/
> +   if (brw->gen >= 8)
> +  return brw_inst_bits(src, 95, 95) ||
> + brw_inst_bits(src, 47, 47) ||
> + brw_inst_bits(src, 11, 11) ||
> + brw_inst_bits(src, 7,  7);

11 (NibCtrl) is the only one that isn't reserved. I'd rather assert
that reserved bits are not set.

> +   else
> +  return brw_inst_bits(src, 95, 91) ||
> + brw_inst_bits(src, 47, 47) ||
> + brw_inst_bits(src, 7,  7) ||
> + (brw->gen < 7 && brw_inst_bits(src, 90, 90));

Same thing, 47 (NibCtrl) is the only non-reserved bit. I'd rather
assert about the others.

> +}
> +
> +static bool
> +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src)
> +{
> +   /* Check for three-source instruction bits that don't map to any of the
> +* fields of the compacted instruction.  All of them seem to be reserved
> +* bits currently.
> +*/
> +   assert(brw->gen >= 8);
> +   if (brw->gen >= 9 || brw->is_cherryview)
> +  return brw_inst_bits(src, 127, 127) ||
> + brw_inst_bits(src, 105, 105) ||

105 is Src1's SubRegNum[word] on CHV and is included in SourceIndex.

> + brw_inst_bits(src, 7,  7);
> +   else
> +  return brw_inst_bits(src, 127, 126) ||
> + brw_inst_bits(src, 105, 105) ||
> + brw_inst_bits(src, 84, 84) ||
> + brw_inst_bits(src, 36, 35) ||
> + brw_inst_bits(src, 7,  7);

Since bit 7 is reserved in all cases, we might just
assert(brw_inst_bits(src, 7,  7) == 0) in
brw_try_compact_instruction(). I don't have a preference.

So, I think it'd be nice to differentiate between bits that aren't
included in compact instructions and reserved bits.

(Sorry, I could have given you this feedback in the first time around)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCHv2 13/14] i965/gen7-8: Set up early depth/stencil control appropriately for image load/store.

2015-02-09 Thread Francisco Jerez
v2: Store early fragment test mode in brw_wm_prog_data instead of
getting it from core mesa data structures (Ken).
---
 src/mesa/drivers/dri/i965/brw_context.h  | 1 +
 src/mesa/drivers/dri/i965/brw_defines.h  | 3 +++
 src/mesa/drivers/dri/i965/brw_wm.c   | 2 ++
 src/mesa/drivers/dri/i965/gen7_wm_state.c| 6 ++
 src/mesa/drivers/dri/i965/gen8_depth_state.c | 6 +++---
 src/mesa/drivers/dri/i965/gen8_ps_state.c| 6 ++
 6 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index e9ef678..1dfd3e6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -445,6 +445,7 @@ struct brw_wm_prog_data {
 
uint8_t computed_depth_mode;
 
+   bool early_fragment_tests;
bool no_8;
bool dual_src_blend;
bool uses_pos_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index d4648ee..f36f677 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2220,6 +2220,9 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_WM_KILL_ENABLE   (1 << 25)
 # define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT  23
 # define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
+# define GEN7_WM_EARLY_DS_CONTROL_NORMAL(0 << 21)
+# define GEN7_WM_EARLY_DS_CONTROL_PSEXEC(1 << 21)
+# define GEN7_WM_EARLY_DS_CONTROL_PREPS (2 << 21)
 # define GEN7_WM_USES_SOURCE_W (1 << 19)
 # define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
 # define GEN7_WM_POSITION_ZW_CENTROID  (2 << 17)
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index d2b032f..0513190 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -182,6 +182,8 @@ bool do_wm_prog(struct brw_context *brw,
 
prog_data.computed_depth_mode = computed_depth_mode(&fp->program);
 
+   prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests;
+
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
if (!prog)
   prog_data.base.use_alt_mode = true;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 60d58c5..6087968 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -102,6 +102,12 @@ upload_wm_state(struct brw_context *brw)
   dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK;
}
 
+   /* BRW_NEW_FS_PROG_DATA */
+   if (prog_data->early_fragment_tests)
+  dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
+   else if (brw->wm.base.nr_image_params)
+  dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
+
/* _NEW_BUFFERS | _NEW_COLOR */
if (brw->is_haswell &&
!(brw_color_buffer_write_enabled(brw) || writes_depth) &&
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index e428089..7250e28 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -241,10 +241,10 @@ pma_fix_enable(const struct brw_context *brw)
 */
const bool hiz_enabled = depth_irb && intel_renderbuffer_has_hiz(depth_irb);
 
-   /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2).
-* We always leave this set to EDSC_NORMAL (0).
+   /* BRW_NEW_FS_PROG_DATA:
+* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2).
 */
-   const bool edsc_not_preps = true;
+   const bool edsc_not_preps = !brw->wm.prog_data->early_fragment_tests;
 
/* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */
const bool pixel_shader_valid = true;
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c 
b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index b932437..fdab9a3 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -105,6 +105,12 @@ upload_wm_state(struct brw_context *brw)
dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
   GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
 
+   /* BRW_NEW_FS_PROG_DATA */
+   if (brw->wm.prog_data->early_fragment_tests)
+  dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS;
+   else if (brw->wm.base.nr_image_params)
+  dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2));
OUT_BATCH(dw1);
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/32] i965/fs: Fix stack allocation of fs_inst and stop stealing src array provided on construction.

2015-02-09 Thread Matt Turner
Sounds well reasoned to me.

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: Define ffsll on OpenBSD.

2015-02-09 Thread Jose Fonseca

On 09/02/15 16:59, Jon TURNEY wrote:

On 06/02/2015 19:58, Matt Turner wrote:

On Fri, Feb 6, 2015 at 3:38 AM, Jonathan Gray  wrote:

OpenBSD has ffs in libc but does not have ffsll so use the compiler
builtin.  PIPE_OS_BSD isn't suitable here as FreeBSD has ffsll in libc.

Signed-off-by: Jonathan Gray 
---
  src/gallium/auxiliary/util/u_math.h | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h
b/src/gallium/auxiliary/util/u_math.h
index 5db5b66..ec282f3 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -531,6 +531,8 @@ unsigned ffs( unsigned u )
  #elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
  #define ffs __builtin_ffs
  #define ffsll __builtin_ffsll
+#elif defined(__OpenBSD__)
+#define ffsll __builtin_ffsll
  #endif


Autoconf checks for presence of a bunch of builtins. Please use those
instead (in this case, HAVE___BUILTIN_FFSLL).


Yes, please.

This has just been 'fixed' for MinGW, now for OpenBSD, and also needs
fixing for Cygwin.




Attached is a patch which attempts to do this using autoconf checks.


The issue is that this will break scons builds unless these checks are 
replicated there.  And SCons implementation of configure checks are not 
great to be honest -- they either are cached (but in such way were 
multiple builds from same source tree pick up wrong values) or they need 
to be re-checked on every build (wasting time for incremental builds).


This is why, within reason, I personally like to avoid configure checks 
when practical.



So for now I'd prefer to leave MinGW 'fixed' as you put it.

But fell free to fix the other platforms as you propose.


BTW, isn't there any standard include that defines ffsll as macro or 
inline on top of __builtin_ffsll for systems that support it?



Jose

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/32] i965: Factor out virtual GRF allocation to a separate object.

2015-02-09 Thread Matt Turner
On Fri, Feb 6, 2015 at 2:40 PM, Matt Turner  wrote:
> 8 - Sent a question
> 9 - Like mine better?
> 10 - Looks wrong to me
> 11-13 - Asked Jason to review
> 14 - Asked for an example showing the problem
> 15-16 - R-b
> 17-18 - R-b - but wow, did we really hit these in practice?
> 19-20 - R-b
> 21 - Sent a question
> 22-25 - R-b

8, 9, and 21 are R-b
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHv2 32/32] i965: Don't compact instructions with unmapped bits.

2015-02-09 Thread Francisco Jerez
Matt Turner  writes:

> On Mon, Feb 9, 2015 at 6:08 AM, Francisco Jerez  wrote:
>> Some instruction bits don't have a mapping defined to any compacted
>> instruction field.  If they're ever set and we end up compacting the
>> instruction they will be forced to zero.  Avoid using compaction in such
>> cases.
>>
>> v2: Align multiple lines of an expression to the same column.  Change
>> conditional compaction of 3-source instructions to an
>> assertion. (Matt)
>> ---
>>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 46 
>> ++
>>  1 file changed, 46 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
>> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
>> index 8e33bcb..f80bcc1 100644
>> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
>> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
>> @@ -843,10 +843,53 @@ set_3src_source_index(struct brw_context *brw, 
>> brw_compact_inst *dst, brw_inst *
>>  }
>>
>>  static bool
>> +has_unmapped_bits(struct brw_context *brw, brw_inst *src)
>> +{
>> +   /* Check for instruction bits that don't map to any of the fields of the
>> +* compacted instruction.  The instruction cannot be compacted if any of
>> +* them are set.  They overlap with:
>> +*  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
>> +*  - Dst.AddrImm[9] (bit 47 on Gen8)
>> +*  - Src0.AddrImm[9] (bit 95 on Gen8)
>> +*/
>> +   if (brw->gen >= 8)
>> +  return brw_inst_bits(src, 95, 95) ||
>> + brw_inst_bits(src, 47, 47) ||
>> + brw_inst_bits(src, 11, 11) ||
>> + brw_inst_bits(src, 7,  7);
>
> 11 (NibCtrl) is the only one that isn't reserved. I'd rather assert
> that reserved bits are not set.
>
No, bits 47 and 95 are valid too and part of the AddrImm fields.

>> +   else
>> +  return brw_inst_bits(src, 95, 91) ||
>> + brw_inst_bits(src, 47, 47) ||
>> + brw_inst_bits(src, 7,  7) ||
>> + (brw->gen < 7 && brw_inst_bits(src, 90, 90));
>
> Same thing, 47 (NibCtrl) is the only non-reserved bit. I'd rather
> assert about the others.
>
>> +}
>> +
>> +static bool
>> +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src)
>> +{
>> +   /* Check for three-source instruction bits that don't map to any of the
>> +* fields of the compacted instruction.  All of them seem to be reserved
>> +* bits currently.
>> +*/
>> +   assert(brw->gen >= 8);
>> +   if (brw->gen >= 9 || brw->is_cherryview)
>> +  return brw_inst_bits(src, 127, 127) ||
>> + brw_inst_bits(src, 105, 105) ||
>
> 105 is Src1's SubRegNum[word] on CHV and is included in SourceIndex.
>
Ahh, that's true, good catch.

>> + brw_inst_bits(src, 7,  7);
>> +   else
>> +  return brw_inst_bits(src, 127, 126) ||
>> + brw_inst_bits(src, 105, 105) ||
>> + brw_inst_bits(src, 84, 84) ||
>> + brw_inst_bits(src, 36, 35) ||
>> + brw_inst_bits(src, 7,  7);
>
> Since bit 7 is reserved in all cases, we might just
> assert(brw_inst_bits(src, 7,  7) == 0) in
> brw_try_compact_instruction(). I don't have a preference.
>
> So, I think it'd be nice to differentiate between bits that aren't
> included in compact instructions and reserved bits.
>
> (Sorry, I could have given you this feedback in the first time around)


pgp3QKojZ224p.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass

2015-02-09 Thread Connor Abbott
So last time you posted this, I had some suggestions but I realized
that in light of the fact that we want to do value numbering, it
didn't seem like my suggestions were any good. But now that I've
thought about it a little bit, it seems to me like a better plan might
be to pull instructions out of their basic blocks and into the global
list of instructions earlier. That is, instead of marking instructions
as pinned in gcm_pin_instructions_block(), put the ones that aren't
pinned in state->instrs and then instead of walking over all the basic
blocks and bailing on pinned instructions when scheduling early and
late, just walk over state->instrs. This should be faster, and when we
do value-numbering there's a natural place to do it, i.e. after
pulling instructions but before scheduling -- this means we only
value-number the non-pinned instructions, which is what we want since
we're not coalescing the pinned instructions in this pass. This is
also more similar to how it's presented in the paper (although our
scheduling algorithm will still be different I guess).

On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand  wrote:
> v2 Jason Ekstrand :
>  - Use nir_dominance_lca for computing least common anscestors
>  - Use the block index for comparing dominance tree depths
>  - Pin things that do partial derivatives
> ---
>  src/glsl/Makefile.sources  |   1 +
>  src/glsl/nir/nir.h |   2 +
>  src/glsl/nir/nir_opt_gcm.c | 501 
> +
>  3 files changed, 504 insertions(+)
>  create mode 100644 src/glsl/nir/nir_opt_gcm.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index a580b6e..69cb2e6 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -43,6 +43,7 @@ NIR_FILES = \
> nir/nir_opt_copy_propagate.c \
> nir/nir_opt_cse.c \
> nir/nir_opt_dce.c \
> +   nir/nir_opt_gcm.c \
> nir/nir_opt_global_to_local.c \
> nir/nir_opt_peephole_select.c \
> nir/nir_opt_remove_phis.c \
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 90a7001..55fb43d 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader);
>  bool nir_opt_dce_impl(nir_function_impl *impl);
>  bool nir_opt_dce(nir_shader *shader);
>
> +void nir_opt_gcm(nir_shader *shader);
> +
>  bool nir_opt_peephole_select(nir_shader *shader);
>  bool nir_opt_peephole_ffma(nir_shader *shader);
>
> diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c
> new file mode 100644
> index 000..d48518b
> --- /dev/null
> +++ b/src/glsl/nir/nir_opt_gcm.c
> @@ -0,0 +1,501 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Jason Ekstrand (ja...@jlekstrand.net)
> + *
> + */
> +
> +#include "nir.h"
> +
> +/*
> + * Implements Global Code Motion.  A description of GCM can be found in
> + * "Global Code Motion; Global Value Numbering" by Cliff Click.
> + * Unfortunately, the algorithm presented in the paper is broken in a
> + * number of ways.  The algorithm used here differs substantially from the
> + * one in the paper but it is, in my opinion, much easier to read and
> + * verify correcness.
> + */
> +
> +struct gcm_block_info {
> +   /* Number of loops this block is inside */
> +   unsigned loop_depth;
> +
> +   /* The last instruction inserted into this block.  This is used as we
> +* traverse the instructions and insert them back into the program to
> +* put them in the right order.
> +*/
> +   nir_instr *last_instr;
> +};
> +
> +struct gcm_state {
> +   nir_function_impl *impl;
> +   nir_instr *instr;
> +
> +   /* Marks all instructions that have been visited by the curren pass */
> +   BITSET_WORD *visited;
> +
> +   /* Marks instructions that are "pinned", i.e. can

[Mesa-dev] [PATCHv3 32/32] i965: Don't compact instructions with unmapped bits.

2015-02-09 Thread Francisco Jerez
Some instruction bits don't have a mapping defined to any compacted
instruction field.  If they're ever set and we end up compacting the
instruction they will be forced to zero.  Avoid using compaction in such
cases.

v2: Align multiple lines of an expression to the same column.  Change
conditional compaction of 3-source instructions to an
assertion. (Matt)
v3: The 3-source instruction bit 105 is part of SourceIndex on CHV.
Add assertion that reserved bit 7 is not set. (Matt)
Document overlap with UIP and 64-bit immediate fields.
---
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 48 ++
 1 file changed, 48 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 8e33bcb..cb93636 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -843,10 +843,54 @@ set_3src_source_index(struct brw_context *brw, 
brw_compact_inst *dst, brw_inst *
 }
 
 static bool
+has_unmapped_bits(struct brw_context *brw, brw_inst *src)
+{
+   /* Check for instruction bits that don't map to any of the fields of the
+* compacted instruction.  The instruction cannot be compacted if any of
+* them are set.  They overlap with:
+*  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
+*  - Dst.AddrImm[9] (bit 47 on Gen8)
+*  - Src0.AddrImm[9] (bit 95 on Gen8)
+*  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
+*  - UIP[31] (bit 95 on Gen8)
+*/
+   if (brw->gen >= 8)
+  return brw_inst_bits(src, 95, 95) ||
+ brw_inst_bits(src, 47, 47) ||
+ brw_inst_bits(src, 11, 11) ||
+ brw_inst_bits(src, 7,  7);
+   else
+  return brw_inst_bits(src, 95, 91) ||
+ brw_inst_bits(src, 47, 47) ||
+ brw_inst_bits(src, 7,  7) ||
+ (brw->gen < 7 && brw_inst_bits(src, 90, 90));
+}
+
+static bool
+has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src)
+{
+   /* Check for three-source instruction bits that don't map to any of the
+* fields of the compacted instruction.  All of them seem to be reserved
+* bits currently.
+*/
+   assert(brw->gen >= 8);
+   if (brw->gen >= 9 || brw->is_cherryview)
+  return brw_inst_bits(src, 127, 127) ||
+ brw_inst_bits(src, 7,  7);
+   else
+  return brw_inst_bits(src, 127, 126) ||
+ brw_inst_bits(src, 105, 105) ||
+ brw_inst_bits(src, 84, 84) ||
+ brw_inst_bits(src, 36, 35) ||
+ brw_inst_bits(src, 7,  7);
+}
+
+static bool
 brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst 
*dst,
  brw_inst *src)
 {
assert(brw->gen >= 8);
+   assert(!has_3src_unmapped_bits(brw, src));
 
 #define compact(field) \
brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src))
@@ -914,6 +958,7 @@ brw_try_compact_instruction(struct brw_context *brw, 
brw_compact_inst *dst,
brw_compact_inst temp;
 
assert(brw_inst_cmpt_control(brw, src) == 0);
+   assert(brw_inst_bits(src, 7,  7) == 0); /* reserved bit */
 
if (is_3src(brw_inst_opcode(brw, src))) {
   if (brw->gen >= 8) {
@@ -937,6 +982,9 @@ brw_try_compact_instruction(struct brw_context *brw, 
brw_compact_inst *dst,
   return false;
}
 
+   if (has_unmapped_bits(brw, src))
+  return false;
+
memset(&temp, 0, sizeof(temp));
 
brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src));
-- 
2.1.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] mesa-10.4.4: BROKEN TLS support in GLX with llvm-toolchain v3.6.0rc2

2015-02-09 Thread Jose Fonseca

On 09/02/15 17:44, Emil Velikov wrote:

Hi Sedat,

On 07/02/15 22:42, Sedat Dilek wrote:

[ Please CC me I am not subscribed to mesa-dev and llvmdev MLs ]

Hi,

I already reported this when playing 1st time with my llvm-toolchain
v3.6.0rc2 and mesa v10.3.7 [1].
The issue still remains in mesa v10.4.4.

So, this is a field test to see if LLVM/Clang v3.6.0rc2 fits my needs.

I see the following build-error...
...

make[4]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mapi'
   CC shared_glapi_libglapi_la-entry.lo
clang version 3.6.0 (tags/RELEASE_360/rc2)
Target: x86_64-unknown-linux-gnu
Thread model: posix
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6.4
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2
Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Selected multilib: .;@m64
  "/opt/llvm-toolchain-3.6.0rc2/bin/clang" -cc1 -triple
x86_64-unknown-linux-gnu -emit-obj -mrelax-all -disable-free
-main-file-name entry.c -mrelocation-model static -mthread-model posix
-mdisable-fp-elim -relaxed-aliasing -fmath-errno -masm-verbose
-mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu
x86-64 -target-linker-version 2.22 -v -g -dwarf-column-info
-coverage-file /home/wearefam/src/mesa/mesa-git/src/mapi/entry.c
-resource-dir /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0
-dependency-file .deps/shared_glapi_libglapi_la-entry.Tpo
-sys-header-deps -MP -MT shared_glapi_libglapi_la-entry.lo -D
"PACKAGE_NAME=\"Mesa\"" -D "PACKAGE_TARNAME=\"mesa\"" -D
"PACKAGE_VERSION=\"10.4.4\"" -D "PACKAGE_STRING=\"Mesa 10.4.4\"" -D
"PACKAGE_BUGREPORT=\"https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_enter-5Fbug.cgi-3Fproduct-3DMesa&d=AwID-g&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=LdbG9btkWrIw7ABhWDTiHtGwFnB7cDCY6cHlnzXawlQ&s=sH_YjhwwusfBRXm5X-z7LRFkCz68ItpSfCmRnHJAYkw&e=
 \""
-D "PACKAGE_URL=\"\"" -D "PACKAGE=\"mesa\"" -D "VERSION=\"10.4.4\"" -D
STDC_HEADERS=1 -D HAVE_SYS_TYPES_H=1 -D HAVE_SYS_STAT_H=1 -D
HAVE_STDLIB_H=1 -D HAVE_STRING_H=1 -D HAVE_MEMORY_H=1 -D
HAVE_STRINGS_H=1 -D HAVE_INTTYPES_H=1 -D HAVE_STDINT_H=1 -D
HAVE_UNISTD_H=1 -D HAVE_DLFCN_H=1 -D "LT_OBJDIR=\".libs/\"" -D
YYTEXT_POINTER=1 -D HAVE___BUILTIN_BSWAP32=1 -D
HAVE___BUILTIN_BSWAP64=1 -D HAVE___BUILTIN_CLZ=1 -D
HAVE___BUILTIN_CLZLL=1 -D HAVE___BUILTIN_CTZ=1 -D
HAVE___BUILTIN_EXPECT=1 -D HAVE___BUILTIN_FFS=1 -D
HAVE___BUILTIN_FFSLL=1 -D HAVE___BUILTIN_POPCOUNT=1 -D
HAVE___BUILTIN_POPCOUNTLL=1 -D HAVE___BUILTIN_UNREACHABLE=1 -D
HAVE_DLADDR=1 -D HAVE_PTHREAD=1 -D HAVE_LIBEXPAT=1 -D
USE_EXTERNAL_DXTN_LIB=1 -D _GNU_SOURCE -D USE_SSE41 -D DEBUG -D
USE_X86_64_ASM -D HAVE_XLOCALE_H -D HAVE_STRTOF -D HAVE_DLOPEN -D
HAVE_POSIX_MEMALIGN -D HAVE_LIBDRM -D GLX_USE_DRM -D HAVE_LIBUDEV -D
GLX_INDIRECT_RENDERING -D GLX_DIRECT_RENDERING -D GLX_USE_TLS -D
HAVE_ALIAS -D HAVE_MINCORE -D HAVE_LLVM=0x0306 -D LLVM_VERSION_PATCH=0
-D MAPI_MODE_GLAPI -D
"MAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\"" -I . -I
../../include -I ../../src/mapi -I ../../src/mapi -I /opt/xorg/include
-internal-isystem /usr/local/include -internal-isystem
/opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include
-internal-externc-isystem /usr/include/x86_64-linux-gnu
-internal-externc-isystem /include -internal-externc-isystem
/usr/include -O0 -Wall -Werror=implicit-function-declaration
-Werror=missing-prototypes -std=c99 -fdebug-compilation-dir
/home/wearefam/src/mesa/mesa-git/src/mapi -ferror-limit 19
-fmessage-length 0 -pthread -mstackrealign -fobjc-runtime=gcc
-fdiagnostics-show-option -o entry.o -x c ../../src/mapi/entry.c
clang -cc1 version 3.6.0 based upon LLVM 3.6.0 default target
x86_64-unknown-linux-gnu
ignoring nonexistent directory "/include"
ignoring duplicate directory "."
ignoring duplicate directory "."
#include "..." search starts here:
#include <...> search starts here:
  .
  ../../include
  /opt/xorg/include
  /usr/local/include
  /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include
  /usr/include/x86_64-linux-gnu
  /usr/include
End of search list.
In file included from ../../src/mapi/entry.c:49:
./entry_x86-64_tls.h:66:1: warning: tentative array definition assumed
to have one element
x86_64_entry_start[];
^
fatal error: error in backend: symbol 'x86_64_entry_start' is already defined
clang: error: clang frontend command failed with exit code 70 (use -v
to see invocation)
clang version 3.6.0 (tags/RELEASE_360/rc2)
Target: x86_64-unknown-linux-gnu
Thread model: posix
clang: note: diagnostic msg: PLEASE submit a bug report to
https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_bugs_&d=AwID-g&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=LdbG9btkWrIw7ABhWDTiHtGwFnB7cDCY6cHlnzXawlQ&s=

Re: [Mesa-dev] [PATCH] mesa: Bump MAX_IMAGE_UNIFORMS to 32.

2015-02-09 Thread Kenneth Graunke
On Monday, February 09, 2015 06:37:45 PM Francisco Jerez wrote:
> So the i965 driver can expose 32 image uniforms per shader stage.
> ---
>  src/mesa/main/config.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
> index 4ec4b75..08e1a14 100644
> --- a/src/mesa/main/config.h
> +++ b/src/mesa/main/config.h
> @@ -178,7 +178,7 @@
>  #define MAX_COMBINED_ATOMIC_BUFFERS(MAX_UNIFORM_BUFFERS * 6)
>  /* Size of an atomic counter in bytes according to 
> ARB_shader_atomic_counters */
>  #define ATOMIC_COUNTER_SIZE4
> -#define MAX_IMAGE_UNIFORMS 16
> +#define MAX_IMAGE_UNIFORMS 32
>  /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
>  #define MAX_IMAGE_UNITS(MAX_IMAGE_UNIFORMS * 6)
>  /*@}*/
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Don't tile 1D miptrees.

2015-02-09 Thread Kenneth Graunke
On Monday, February 09, 2015 07:15:28 PM Francisco Jerez wrote:
> It doesn't really improve locality of texture fetches, quite the
> opposite it's a waste of memory bandwidth and space due to tile
> alignment.
> 
> v2: Check mt->logical_height0 instead of mt->target (Ken).  Add short
> comment explaining why they shouldn't be tiled.
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 64752dd..0e3888f 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -488,6 +488,13 @@ intel_miptree_choose_tiling(struct brw_context *brw,
> base_format == GL_DEPTH_STENCIL_EXT)
>return I915_TILING_Y;
>  
> +   /* 1D textures (and 1D array textures) don't get any benefit from tiling,
> +* in fact it leads to a less efficient use of memory space and bandwidth
> +* due to tile alignment.
> +*/
> +   if (mt->logical_height0 == 1)
> +  return I915_TILING_NONE;
> +
> int minimum_pitch = mt->total_width * mt->cpp;
>  
> /* If the width is much smaller than a tile, don't bother tiling. */
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/skl: Implement WaDisable1DDepthStencil

2015-02-09 Thread Kenneth Graunke
On Monday, February 09, 2015 05:57:20 PM Neil Roberts wrote:
> Skylake+ doesn't support setting a depth buffer to a 1D surface but it
> does allow pretending it's a 2D texture with a height of 1 instead.
> 
> This fixes the GL_DEPTH_COMPONENT_* tests of the copyteximage piglit
> test (and also seems to avoid a subsequent GPU hang).
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89037
> ---
> This probably only makes sense on top of Francisco Jerez's patch here:
> 
> http://lists.freedesktop.org/archives/mesa-dev/2015-February/076392.html
> 
> The copyteximage test is still failing with some other formats such as
> GL_RGB16 and some intensity and luminance formats but I'm just looking
> into that now.
> 
>  src/mesa/drivers/dri/i965/gen8_depth_state.c | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c 
> b/src/mesa/drivers/dri/i965/gen8_depth_state.c
> index e428089..b4eb6e1 100644
> --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
> @@ -190,6 +190,18 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
> case GL_TEXTURE_3D:
>assert(mt);
>depth = MAX2(mt->logical_depth0, 1);
> +  surftype = translate_tex_target(gl_target);
> +  break;
> +   case GL_TEXTURE_1D_ARRAY:
> +   case GL_TEXTURE_1D:
> +  if (brw->gen >= 9) {
> + /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth
> +  * textures but it does allow pretending it's a 2D texture
> +  * instead.
> +  */
> + surftype = BRW_SURFACE_2D;
> + break;
> +  }
>/* fallthrough */
> default:
>surftype = translate_tex_target(gl_target);
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHv3 32/32] i965: Don't compact instructions with unmapped bits.

2015-02-09 Thread Matt Turner
On Mon, Feb 9, 2015 at 12:26 PM, Francisco Jerez  wrote:
> Some instruction bits don't have a mapping defined to any compacted
> instruction field.  If they're ever set and we end up compacting the
> instruction they will be forced to zero.  Avoid using compaction in such
> cases.
>
> v2: Align multiple lines of an expression to the same column.  Change
> conditional compaction of 3-source instructions to an
> assertion. (Matt)
> v3: The 3-source instruction bit 105 is part of SourceIndex on CHV.
> Add assertion that reserved bit 7 is not set. (Matt)
> Document overlap with UIP and 64-bit immediate fields.
> ---
>  src/mesa/drivers/dri/i965/brw_eu_compact.c | 48 
> ++
>  1 file changed, 48 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c 
> b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> index 8e33bcb..cb93636 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
> @@ -843,10 +843,54 @@ set_3src_source_index(struct brw_context *brw, 
> brw_compact_inst *dst, brw_inst *
>  }
>
>  static bool
> +has_unmapped_bits(struct brw_context *brw, brw_inst *src)
> +{
> +   /* Check for instruction bits that don't map to any of the fields of the
> +* compacted instruction.  The instruction cannot be compacted if any of
> +* them are set.  They overlap with:
> +*  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
> +*  - Dst.AddrImm[9] (bit 47 on Gen8)
> +*  - Src0.AddrImm[9] (bit 95 on Gen8)
> +*  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
> +*  - UIP[31] (bit 95 on Gen8)
> +*/
> +   if (brw->gen >= 8)
> +  return brw_inst_bits(src, 95, 95) ||
> + brw_inst_bits(src, 47, 47) ||
> + brw_inst_bits(src, 11, 11) ||
> + brw_inst_bits(src, 7,  7);
> +   else
> +  return brw_inst_bits(src, 95, 91) ||
> + brw_inst_bits(src, 47, 47) ||
> + brw_inst_bits(src, 7,  7) ||
> + (brw->gen < 7 && brw_inst_bits(src, 90, 90));
> +}
> +
> +static bool
> +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src)
> +{
> +   /* Check for three-source instruction bits that don't map to any of the
> +* fields of the compacted instruction.  All of them seem to be reserved
> +* bits currently.
> +*/
> +   assert(brw->gen >= 8);
> +   if (brw->gen >= 9 || brw->is_cherryview)
> +  return brw_inst_bits(src, 127, 127) ||
> + brw_inst_bits(src, 7,  7);
> +   else
> +  return brw_inst_bits(src, 127, 126) ||
> + brw_inst_bits(src, 105, 105) ||
> + brw_inst_bits(src, 84, 84) ||
> + brw_inst_bits(src, 36, 35) ||
> + brw_inst_bits(src, 7,  7);

Thanks for adding the assertion for bit 7. Do you think it's valuable
to check it here as well? I wouldn't think it was.

What I'm suggesting is that we only do assertions about reserved bits.
If they're set, we'll trigger the assertion only in debug builds, but
I think that's sufficient. I think we should only check useful bits
here (i.e., non-reserverd bits in the uncompacted instruction that
don't exist in the compacted instruction).

I think effectively that means removing the checks for bit 7, all of
the bits in the 3-src BDW case (always return false), and 90 on gen <
7.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir: Mark nir_print_instr's instr pointer as const.

2015-02-09 Thread Kenneth Graunke
Printing instructions doesn't modify them, so we can mark the parameter
const.
---
 src/glsl/nir/nir.h   | 2 +-
 src/glsl/nir/nir_print.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4cb2e92..ceda977 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1489,7 +1489,7 @@ void nir_index_ssa_defs(nir_function_impl *impl);
 void nir_index_blocks(nir_function_impl *impl);
 
 void nir_print_shader(nir_shader *shader, FILE *fp);
-void nir_print_instr(nir_instr *instr, FILE *fp);
+void nir_print_instr(const nir_instr *instr, FILE *fp);
 
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 9c07950..6a3c6a0 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -576,7 +576,7 @@ print_parallel_copy_instr(nir_parallel_copy_instr *instr, 
FILE *fp)
 }
 
 static void
-print_instr(nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp)
+print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, 
FILE *fp)
 {
print_tabs(tabs, fp);
 
@@ -882,7 +882,7 @@ nir_print_shader(nir_shader *shader, FILE *fp)
 }
 
 void
-nir_print_instr(nir_instr *instr, FILE *fp)
+nir_print_instr(const nir_instr *instr, FILE *fp)
 {
print_instr(instr, NULL, 0, fp);
 }
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Mark nir_print_instr's instr pointer as const.

2015-02-09 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Mon, Feb 9, 2015 at 4:42 PM, Kenneth Graunke 
wrote:

> Printing instructions doesn't modify them, so we can mark the parameter
> const.
> ---
>  src/glsl/nir/nir.h   | 2 +-
>  src/glsl/nir/nir_print.c | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 4cb2e92..ceda977 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1489,7 +1489,7 @@ void nir_index_ssa_defs(nir_function_impl *impl);
>  void nir_index_blocks(nir_function_impl *impl);
>
>  void nir_print_shader(nir_shader *shader, FILE *fp);
> -void nir_print_instr(nir_instr *instr, FILE *fp);
> +void nir_print_instr(const nir_instr *instr, FILE *fp);
>
>  #ifdef DEBUG
>  void nir_validate_shader(nir_shader *shader);
> diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
> index 9c07950..6a3c6a0 100644
> --- a/src/glsl/nir/nir_print.c
> +++ b/src/glsl/nir/nir_print.c
> @@ -576,7 +576,7 @@ print_parallel_copy_instr(nir_parallel_copy_instr
> *instr, FILE *fp)
>  }
>
>  static void
> -print_instr(nir_instr *instr, print_var_state *state, unsigned tabs, FILE
> *fp)
> +print_instr(const nir_instr *instr, print_var_state *state, unsigned
> tabs, FILE *fp)
>  {
> print_tabs(tabs, fp);
>
> @@ -882,7 +882,7 @@ nir_print_shader(nir_shader *shader, FILE *fp)
>  }
>
>  void
> -nir_print_instr(nir_instr *instr, FILE *fp)
> +nir_print_instr(const nir_instr *instr, FILE *fp)
>  {
> print_instr(instr, NULL, 0, fp);
>  }
> --
> 2.2.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Fix logic error.

2015-02-09 Thread Jason Ekstrand
Sorry I didn't get to this earlier.  It got lost in the backlog.

Reviewed-by: Jason Ekstrand 

Out of curiosity, are there any shader-db results for this?  If not, that's
ok.

--Jason

On Wed, Feb 4, 2015 at 3:19 PM, Matt Turner  wrote:

> ---
> I don't know if this is right, but what we had before was definitely
> wrong. (And gcc warned about it!)
>
>  src/glsl/nir/nir_lower_phis_to_scalar.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c
> b/src/glsl/nir/nir_lower_phis_to_scalar.c
> index 3bb5cc7..7c2f539 100644
> --- a/src/glsl/nir/nir_lower_phis_to_scalar.c
> +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
> @@ -65,9 +65,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
> * are ok too.
> */
>return nir_op_infos[src_alu->op].output_size == 0 ||
> - src_alu->op != nir_op_vec2 ||
> - src_alu->op != nir_op_vec3 ||
> - src_alu->op != nir_op_vec4;
> + (src_alu->op != nir_op_vec2 &&
> +  src_alu->op != nir_op_vec3 &&
> +  src_alu->op != nir_op_vec4);
> }
>
> case nir_instr_type_phi:
> --
> 2.0.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Fix logic error.

2015-02-09 Thread Matt Turner
On Mon, Feb 9, 2015 at 1:45 PM, Jason Ekstrand  wrote:
> Sorry I didn't get to this earlier.  It got lost in the backlog.
>
> Reviewed-by: Jason Ekstrand 
>
> Out of curiosity, are there any shader-db results for this?  If not, that's
> ok.

I didn't even know if this the correct fix, so no shader-db numbers. :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass

2015-02-09 Thread Jason Ekstrand
On Mon, Feb 9, 2015 at 3:01 PM, Connor Abbott  wrote:

> So last time you posted this, I had some suggestions but I realized
> that in light of the fact that we want to do value numbering, it
> didn't seem like my suggestions were any good. But now that I've
> thought about it a little bit, it seems to me like a better plan might
> be to pull instructions out of their basic blocks and into the global
> list of instructions earlier. That is, instead of marking instructions
> as pinned in gcm_pin_instructions_block(), put the ones that aren't
> pinned in state->instrs and then instead of walking over all the basic
> blocks and bailing on pinned instructions when scheduling early and
> late, just walk over state->instrs. This should be faster, and when we
> do value-numbering there's a natural place to do it, i.e. after
> pulling instructions but before scheduling -- this means we only
> value-number the non-pinned instructions, which is what we want since
> we're not coalescing the pinned instructions in this pass. This is
> also more similar to how it's presented in the paper (although our
> scheduling algorithm will still be different I guess).
>
> On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand 
> wrote:
> > v2 Jason Ekstrand :
> >  - Use nir_dominance_lca for computing least common anscestors
> >  - Use the block index for comparing dominance tree depths
> >  - Pin things that do partial derivatives
> > ---
> >  src/glsl/Makefile.sources  |   1 +
> >  src/glsl/nir/nir.h |   2 +
> >  src/glsl/nir/nir_opt_gcm.c | 501
> +
> >  3 files changed, 504 insertions(+)
> >  create mode 100644 src/glsl/nir/nir_opt_gcm.c
> >
> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> > index a580b6e..69cb2e6 100644
> > --- a/src/glsl/Makefile.sources
> > +++ b/src/glsl/Makefile.sources
> > @@ -43,6 +43,7 @@ NIR_FILES = \
> > nir/nir_opt_copy_propagate.c \
> > nir/nir_opt_cse.c \
> > nir/nir_opt_dce.c \
> > +   nir/nir_opt_gcm.c \
> > nir/nir_opt_global_to_local.c \
> > nir/nir_opt_peephole_select.c \
> > nir/nir_opt_remove_phis.c \
> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> > index 90a7001..55fb43d 100644
> > --- a/src/glsl/nir/nir.h
> > +++ b/src/glsl/nir/nir.h
> > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader);
> >  bool nir_opt_dce_impl(nir_function_impl *impl);
> >  bool nir_opt_dce(nir_shader *shader);
> >
> > +void nir_opt_gcm(nir_shader *shader);
> > +
> >  bool nir_opt_peephole_select(nir_shader *shader);
> >  bool nir_opt_peephole_ffma(nir_shader *shader);
> >
> > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c
> > new file mode 100644
> > index 000..d48518b
> > --- /dev/null
> > +++ b/src/glsl/nir/nir_opt_gcm.c
> > @@ -0,0 +1,501 @@
> > +/*
> > + * Copyright © 2014 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + * Authors:
> > + *Jason Ekstrand (ja...@jlekstrand.net)
> > + *
> > + */
> > +
> > +#include "nir.h"
> > +
> > +/*
> > + * Implements Global Code Motion.  A description of GCM can be found in
> > + * "Global Code Motion; Global Value Numbering" by Cliff Click.
> > + * Unfortunately, the algorithm presented in the paper is broken in a
> > + * number of ways.  The algorithm used here differs substantially from
> the
> > + * one in the paper but it is, in my opinion, much easier to read and
> > + * verify correcness.
> > + */
> > +
> > +struct gcm_block_info {
> > +   /* Number of loops this block is inside */
> > +   unsigned loop_depth;
> > +
> > +   /* The last instruction inserted into this block.  This is used as we
> > +* traverse the instructions and insert them back into the program to
> > +* put them in the right order.
> > + 

Re: [Mesa-dev] [PATCH] nir: Fix logic error.

2015-02-09 Thread Jason Ekstrand
On Mon, Feb 9, 2015 at 4:54 PM, Matt Turner  wrote:

> On Mon, Feb 9, 2015 at 1:45 PM, Jason Ekstrand 
> wrote:
> > Sorry I didn't get to this earlier.  It got lost in the backlog.
> >
> > Reviewed-by: Jason Ekstrand 
> >
> > Out of curiosity, are there any shader-db results for this?  If not,
> that's
> > ok.
>
> I didn't even know if this the correct fix, so no shader-db numbers. :)
>


Wow... I need to learn to read... I think we actually want || and ==.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: Fix logic error.

2015-02-09 Thread Matt Turner
On Mon, Feb 9, 2015 at 2:06 PM, Jason Ekstrand  wrote:
> Wow... I need to learn to read... I think we actually want || and ==.

Feel free to take it over.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89043

--- Comment #1 from Aditya Atluri  ---
Hi,
I re-installed Ubuntu 14.04.1 and did not check "install updates" during
installation. 

Install and Upgrade all the packages.


sudo apt-get install freeglut3-dev build-essential libx11-dev libxmu-dev
libxi-dev libgl1-mesa-glx libglu1-mesa libglu1-mesa-dev

clone the mesa repo.

sudo apt-get install bison flex python-mako x11proto-dri3-dev 
x11proto-present-dev libudev-dev libexpat1-dev expat llvm

build it.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89043

Aditya Atluri  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |NOTABUG

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/10] i965/fs: Combine constants and unconditionally emit MADs

2015-02-09 Thread Matt Turner
On Mon, Feb 9, 2015 at 7:05 AM, Eero Tamminen  wrote:
> Hi,
>
> On 02/05/2015 06:21 AM, Matt Turner wrote:
>>
>> total instructions in shared programs: 5895414 -> 5747578 (-2.51%)
>> instructions in affected programs: 3618111 -> 3470275 (-4.09%)
>> helped:20492
>> HURT:  4449
>> GAINED:54
>> LOST:  146
>>
>> and with NIR, that already emits MADs always:
>>
>> total instructions in shared programs: 7992936 -> 7772474 (-2.76%)
>> instructions in affected programs: 3738730 -> 3518268 (-5.90%)
>> helped:22082
>> HURT:  3445
>> GAINED:70
>> LOST:  78
>>
>> There are some particularly exciting individual improvements too:
>>
>> kerbal-space-program/667: FS SIMD8: 1024 -> 561 (-45.21%)
>> kerbal-space-program/676: FS SIMD8: 1021 -> 558 (-45.35%)
>
>
> From how old Kerbal version these shaders are from?

They were added to shader-db in November, so that would be my guess.

> I just noticed that recent Kerbal outputs large amount of register spill &
> alloc failure messages with INTEL_DEBUG=perf, whereas output I've saved from
> a last year Kerbal version, had much less of them.

I don't have anything to offer about those changes, but Kerbal uses
large arrays of immediate values that aren't marked as const and that
they variably index. Ken and I talked about it, and making those
uniforms would make out code significantly better.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89043

Aditya Atluri  changed:

   What|Removed |Added

 Status|RESOLVED|REOPENED
 Resolution|NOTABUG |---

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89043

--- Comment #2 from Aditya Atluri  ---
It came back!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nir/lower_phis_to_scalar: Fix some logic in is_phi_scalarizable

2015-02-09 Thread Jason Ekstrand
---
 src/glsl/nir/nir_lower_phis_to_scalar.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c 
b/src/glsl/nir/nir_lower_phis_to_scalar.c
index 3bb5cc7..7cd93ea 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -65,9 +65,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
* are ok too.
*/
   return nir_op_infos[src_alu->op].output_size == 0 ||
- src_alu->op != nir_op_vec2 ||
- src_alu->op != nir_op_vec3 ||
- src_alu->op != nir_op_vec4;
+ src_alu->op == nir_op_vec2 ||
+ src_alu->op == nir_op_vec3 ||
+ src_alu->op == nir_op_vec4;
}
 
case nir_instr_type_phi:
-- 
2.2.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass

2015-02-09 Thread Connor Abbott
On Mon, Feb 9, 2015 at 5:04 PM, Jason Ekstrand  wrote:
>
>
> On Mon, Feb 9, 2015 at 3:01 PM, Connor Abbott  wrote:
>>
>> So last time you posted this, I had some suggestions but I realized
>> that in light of the fact that we want to do value numbering, it
>> didn't seem like my suggestions were any good. But now that I've
>> thought about it a little bit, it seems to me like a better plan might
>> be to pull instructions out of their basic blocks and into the global
>> list of instructions earlier. That is, instead of marking instructions
>> as pinned in gcm_pin_instructions_block(), put the ones that aren't
>> pinned in state->instrs and then instead of walking over all the basic
>> blocks and bailing on pinned instructions when scheduling early and
>> late, just walk over state->instrs. This should be faster, and when we
>> do value-numbering there's a natural place to do it, i.e. after
>> pulling instructions but before scheduling -- this means we only
>> value-number the non-pinned instructions, which is what we want since
>> we're not coalescing the pinned instructions in this pass. This is
>> also more similar to how it's presented in the paper (although our
>> scheduling algorithm will still be different I guess).
>>
>> On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand 
>> wrote:
>> > v2 Jason Ekstrand :
>> >  - Use nir_dominance_lca for computing least common anscestors
>> >  - Use the block index for comparing dominance tree depths
>> >  - Pin things that do partial derivatives
>> > ---
>> >  src/glsl/Makefile.sources  |   1 +
>> >  src/glsl/nir/nir.h |   2 +
>> >  src/glsl/nir/nir_opt_gcm.c | 501
>> > +
>> >  3 files changed, 504 insertions(+)
>> >  create mode 100644 src/glsl/nir/nir_opt_gcm.c
>> >
>> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>> > index a580b6e..69cb2e6 100644
>> > --- a/src/glsl/Makefile.sources
>> > +++ b/src/glsl/Makefile.sources
>> > @@ -43,6 +43,7 @@ NIR_FILES = \
>> > nir/nir_opt_copy_propagate.c \
>> > nir/nir_opt_cse.c \
>> > nir/nir_opt_dce.c \
>> > +   nir/nir_opt_gcm.c \
>> > nir/nir_opt_global_to_local.c \
>> > nir/nir_opt_peephole_select.c \
>> > nir/nir_opt_remove_phis.c \
>> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> > index 90a7001..55fb43d 100644
>> > --- a/src/glsl/nir/nir.h
>> > +++ b/src/glsl/nir/nir.h
>> > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader);
>> >  bool nir_opt_dce_impl(nir_function_impl *impl);
>> >  bool nir_opt_dce(nir_shader *shader);
>> >
>> > +void nir_opt_gcm(nir_shader *shader);
>> > +
>> >  bool nir_opt_peephole_select(nir_shader *shader);
>> >  bool nir_opt_peephole_ffma(nir_shader *shader);
>> >
>> > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c
>> > new file mode 100644
>> > index 000..d48518b
>> > --- /dev/null
>> > +++ b/src/glsl/nir/nir_opt_gcm.c
>> > @@ -0,0 +1,501 @@
>> > +/*
>> > + * Copyright © 2014 Intel Corporation
>> > + *
>> > + * Permission is hereby granted, free of charge, to any person
>> > obtaining a
>> > + * copy of this software and associated documentation files (the
>> > "Software"),
>> > + * to deal in the Software without restriction, including without
>> > limitation
>> > + * the rights to use, copy, modify, merge, publish, distribute,
>> > sublicense,
>> > + * and/or sell copies of the Software, and to permit persons to whom
>> > the
>> > + * Software is furnished to do so, subject to the following conditions:
>> > + *
>> > + * The above copyright notice and this permission notice (including the
>> > next
>> > + * paragraph) shall be included in all copies or substantial portions
>> > of the
>> > + * Software.
>> > + *
>> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> > EXPRESS OR
>> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> > MERCHANTABILITY,
>> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
>> > SHALL
>> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> > OTHER
>> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> > ARISING
>> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>> > DEALINGS
>> > + * IN THE SOFTWARE.
>> > + *
>> > + * Authors:
>> > + *Jason Ekstrand (ja...@jlekstrand.net)
>> > + *
>> > + */
>> > +
>> > +#include "nir.h"
>> > +
>> > +/*
>> > + * Implements Global Code Motion.  A description of GCM can be found in
>> > + * "Global Code Motion; Global Value Numbering" by Cliff Click.
>> > + * Unfortunately, the algorithm presented in the paper is broken in a
>> > + * number of ways.  The algorithm used here differs substantially from
>> > the
>> > + * one in the paper but it is, in my opinion, much easier to read and
>> > + * verify correcness.
>> > + */
>> > +
>> > +struct gcm_block_info {
>> > +   /* Number of loops this block is inside */
>> > +   unsigned loop_dep

Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass

2015-02-09 Thread Jason Ekstrand
On Mon, Feb 9, 2015 at 12:01 PM, Connor Abbott  wrote:

> So last time you posted this, I had some suggestions but I realized
> that in light of the fact that we want to do value numbering, it
> didn't seem like my suggestions were any good. But now that I've
> thought about it a little bit, it seems to me like a better plan might
> be to pull instructions out of their basic blocks and into the global
> list of instructions earlier. That is, instead of marking instructions
> as pinned in gcm_pin_instructions_block(), put the ones that aren't
> pinned in state->instrs and then instead of walking over all the basic
> blocks and bailing on pinned instructions when scheduling early and
> late, just walk over state->instrs. This should be faster, and when we
> do value-numbering there's a natural place to do it, i.e. after
> pulling instructions but before scheduling -- this means we only
> value-number the non-pinned instructions, which is what we want since
> we're not coalescing the pinned instructions in this pass. This is
> also more similar to how it's presented in the paper (although our
> scheduling algorithm will still be different I guess).
>

Sorry for not addressing this comment.  A couple of things:

1) We need the "is this instruction pinned" metadata for other things than
just not iterating over them so we need to stash it somehow anyway.

2) I think we want value numbering to be its own pass.  We just won't run
the validator in between that and GCM.  These passes are tricky and complex
enough that I don't want to make it even more confusing by saying "Oh, now
we value-number".  Obviously, GVN and GCM will have to be kept in sync so
we don't combine two things that can't be code-motioned but that's not a
big deal.

As far as pulling them as we pin and then only ever walking over non-pinned
instructions.  I thought about it..  Then I didn't for some historical
reason but I don't remember what that was anymore.  We could go back and do
that and it would be a bit fasater.  I can do that if you'd like but I
don't think it'll matter much.


>
> On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand 
> wrote:
> > v2 Jason Ekstrand :
> >  - Use nir_dominance_lca for computing least common anscestors
> >  - Use the block index for comparing dominance tree depths
> >  - Pin things that do partial derivatives
> > ---
> >  src/glsl/Makefile.sources  |   1 +
> >  src/glsl/nir/nir.h |   2 +
> >  src/glsl/nir/nir_opt_gcm.c | 501
> +
> >  3 files changed, 504 insertions(+)
> >  create mode 100644 src/glsl/nir/nir_opt_gcm.c
> >
> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> > index a580b6e..69cb2e6 100644
> > --- a/src/glsl/Makefile.sources
> > +++ b/src/glsl/Makefile.sources
> > @@ -43,6 +43,7 @@ NIR_FILES = \
> > nir/nir_opt_copy_propagate.c \
> > nir/nir_opt_cse.c \
> > nir/nir_opt_dce.c \
> > +   nir/nir_opt_gcm.c \
> > nir/nir_opt_global_to_local.c \
> > nir/nir_opt_peephole_select.c \
> > nir/nir_opt_remove_phis.c \
> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> > index 90a7001..55fb43d 100644
> > --- a/src/glsl/nir/nir.h
> > +++ b/src/glsl/nir/nir.h
> > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader);
> >  bool nir_opt_dce_impl(nir_function_impl *impl);
> >  bool nir_opt_dce(nir_shader *shader);
> >
> > +void nir_opt_gcm(nir_shader *shader);
> > +
> >  bool nir_opt_peephole_select(nir_shader *shader);
> >  bool nir_opt_peephole_ffma(nir_shader *shader);
> >
> > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c
> > new file mode 100644
> > index 000..d48518b
> > --- /dev/null
> > +++ b/src/glsl/nir/nir_opt_gcm.c
> > @@ -0,0 +1,501 @@
> > +/*
> > + * Copyright © 2014 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 

Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass

2015-02-09 Thread Connor Abbott
On Mon, Feb 9, 2015 at 6:11 PM, Jason Ekstrand  wrote:
>
>
> On Mon, Feb 9, 2015 at 12:01 PM, Connor Abbott  wrote:
>>
>> So last time you posted this, I had some suggestions but I realized
>> that in light of the fact that we want to do value numbering, it
>> didn't seem like my suggestions were any good. But now that I've
>> thought about it a little bit, it seems to me like a better plan might
>> be to pull instructions out of their basic blocks and into the global
>> list of instructions earlier. That is, instead of marking instructions
>> as pinned in gcm_pin_instructions_block(), put the ones that aren't
>> pinned in state->instrs and then instead of walking over all the basic
>> blocks and bailing on pinned instructions when scheduling early and
>> late, just walk over state->instrs. This should be faster, and when we
>> do value-numbering there's a natural place to do it, i.e. after
>> pulling instructions but before scheduling -- this means we only
>> value-number the non-pinned instructions, which is what we want since
>> we're not coalescing the pinned instructions in this pass. This is
>> also more similar to how it's presented in the paper (although our
>> scheduling algorithm will still be different I guess).
>
>
> Sorry for not addressing this comment.  A couple of things:
>
> 1) We need the "is this instruction pinned" metadata for other things than
> just not iterating over them so we need to stash it somehow anyway.

Sure.

>
> 2) I think we want value numbering to be its own pass.  We just won't run
> the validator in between that and GCM.  These passes are tricky and complex
> enough that I don't want to make it even more confusing by saying "Oh, now
> we value-number".  Obviously, GVN and GCM will have to be kept in sync so we
> don't combine two things that can't be code-motioned but that's not a big
> deal.

The thing is, GVN and GCM are very closely related -- not only can you
not do GVN without GCM afterwards, but as you mentioned you need to
know which instructions are pinned in order to not combine things that
can't be combined. So I think calling GVN as a helper/subroutine as a
part of GCM ("hey, take this list of instructions and merge any
duplicates") might actually be better than keeping them as two
kind-of-separate-but-not-really passes that have to be run in a
specific order and share information -- they're actually *more*
self-contained with the former approach than the latter, since all GVN
needs to do is merge the instructions in the list it's given and we
don't need to run the passes in a specific order without validating
them in-between.

>
> As far as pulling them as we pin and then only ever walking over non-pinned
> instructions.  I thought about it..  Then I didn't for some historical
> reason but I don't remember what that was anymore.  We could go back and do
> that and it would be a bit fasater.  I can do that if you'd like but I don't
> think it'll matter much.

True, it'll probably only be worth it if we do what I mentioned above.

>
>>
>>
>> On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand 
>> wrote:
>> > v2 Jason Ekstrand :
>> >  - Use nir_dominance_lca for computing least common anscestors
>> >  - Use the block index for comparing dominance tree depths
>> >  - Pin things that do partial derivatives
>> > ---
>> >  src/glsl/Makefile.sources  |   1 +
>> >  src/glsl/nir/nir.h |   2 +
>> >  src/glsl/nir/nir_opt_gcm.c | 501
>> > +
>> >  3 files changed, 504 insertions(+)
>> >  create mode 100644 src/glsl/nir/nir_opt_gcm.c
>> >
>> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
>> > index a580b6e..69cb2e6 100644
>> > --- a/src/glsl/Makefile.sources
>> > +++ b/src/glsl/Makefile.sources
>> > @@ -43,6 +43,7 @@ NIR_FILES = \
>> > nir/nir_opt_copy_propagate.c \
>> > nir/nir_opt_cse.c \
>> > nir/nir_opt_dce.c \
>> > +   nir/nir_opt_gcm.c \
>> > nir/nir_opt_global_to_local.c \
>> > nir/nir_opt_peephole_select.c \
>> > nir/nir_opt_remove_phis.c \
>> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
>> > index 90a7001..55fb43d 100644
>> > --- a/src/glsl/nir/nir.h
>> > +++ b/src/glsl/nir/nir.h
>> > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader);
>> >  bool nir_opt_dce_impl(nir_function_impl *impl);
>> >  bool nir_opt_dce(nir_shader *shader);
>> >
>> > +void nir_opt_gcm(nir_shader *shader);
>> > +
>> >  bool nir_opt_peephole_select(nir_shader *shader);
>> >  bool nir_opt_peephole_ffma(nir_shader *shader);
>> >
>> > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c
>> > new file mode 100644
>> > index 000..d48518b
>> > --- /dev/null
>> > +++ b/src/glsl/nir/nir_opt_gcm.c
>> > @@ -0,0 +1,501 @@
>> > +/*
>> > + * Copyright © 2014 Intel Corporation
>> > + *
>> > + * Permission is hereby granted, free of charge, to any person
>> > obtaining a
>> > + * copy of this software and associated documentation files (the
>> > "So

[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch

2015-02-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=89043

--- Comment #3 from Brian Paul  ---
Try reconfiguring with --enable-glx-tls and rebuilding.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/10] nir: Properly clean up CF nodes when we remove them

2015-02-09 Thread Connor Abbott
For some reason, the v2 version of this disappeared from my inbox, so I'll say

Reviewed-by: Connor Abbott 

I'm not convinced this is enough for when we optimize constant
if-statement conditions, but I'll leave that be for now.

On Thu, Feb 5, 2015 at 5:28 PM, Jason Ekstrand  wrote:
> Previously, if you remved a CF node that still had instructions in it, none
> of the use/def information from those instructions would get cleaned up.
> Also, we weren't removing if statements from the if_uses of the
> corresponding register or SSA def.  This commit fixes both of these
> problems
> ---
>  src/glsl/nir/nir.c | 54 
> ++
>  1 file changed, 54 insertions(+)
>
> diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
> index 9a88bd3..8ea7bb5 100644
> --- a/src/glsl/nir/nir.c
> +++ b/src/glsl/nir/nir.c
> @@ -1148,6 +1148,58 @@ stitch_blocks(nir_block *before, nir_block *after)
> exec_node_remove(&after->cf_node.node);
>  }
>
> +static void
> +remove_defs_uses(nir_instr *instr);
> +
> +static void
> +cleanup_cf_node(nir_cf_node *node)
> +{
> +   switch (node->type) {
> +   case nir_cf_node_block: {
> +  nir_block *block = nir_cf_node_as_block(node);
> +  /* We need to walk the instructions and clean up defs/uses */
> +  nir_foreach_instr(block, instr)
> + remove_defs_uses(instr);
> +  break;
> +   }
> +
> +   case nir_cf_node_if: {
> +  nir_if *if_stmt = nir_cf_node_as_if(node);
> +  foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
> + cleanup_cf_node(child);
> +  foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
> + cleanup_cf_node(child);
> +
> +  struct set *if_uses;
> +  if (if_stmt->condition.is_ssa) {
> + if_uses = if_stmt->condition.ssa->if_uses;
> +  } else {
> + if_uses = if_stmt->condition.reg.reg->if_uses;
> +  }
> +
> +  struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
> +  assert(entry);
> +  _mesa_set_remove(if_uses, entry);
> +  break;
> +   }
> +
> +   case nir_cf_node_loop: {
> +  nir_loop *loop = nir_cf_node_as_loop(node);
> +  foreach_list_typed(nir_cf_node, child, node, &loop->body)
> + cleanup_cf_node(child);
> +  break;
> +   }
> +   case nir_cf_node_function: {
> +  nir_function_impl *impl = nir_cf_node_as_function(node);
> +  foreach_list_typed(nir_cf_node, child, node, &impl->body)
> + cleanup_cf_node(child);
> +  break;
> +   }
> +   default:
> +  unreachable("Invalid CF node type");
> +   }
> +}
> +
>  void
>  nir_cf_node_remove(nir_cf_node *node)
>  {
> @@ -1175,6 +1227,8 @@ nir_cf_node_remove(nir_cf_node *node)
>exec_node_remove(&node->node);
>stitch_blocks(before_block, after_block);
> }
> +
> +   cleanup_cf_node(node);
>  }
>
>  static bool
> --
> 2.2.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >