Re: [Mesa-dev] [PATCH 02/10] nir: Properly clean up CF nodes when we remove them
On Thu, Feb 5, 2015 at 3:27 PM, Connor Abbott wrote: > While we're cleaning this up, we should go a little farther - we > should also see if anything is using the SSA result of any of these > instructions, and if so make it point to an undef_instr instead to > avoid dangling pointers. This doesn't come up yet but it will when we > start cleaning up constant control flow. > I'm about to send a v2 and I'm leaving this out for now. First, because that's a bit more invasive than I want to do/test right now and second, because I'm not convinced it's needed. For control flow, what we really need to do is assert that there are no phi nodes. As long as that and the dominance property both hold, we won't have any uses of deleted SSA values. > On Thu, Feb 5, 2015 at 5:28 PM, Jason Ekstrand > wrote: > > Previously, if you remved a CF node that still had instructions in it, > none > > of the use/def information from those instructions would get cleaned up. > > Also, we weren't removing if statements from the if_uses of the > > corresponding register or SSA def. This commit fixes both of these > > problems > > --- > > src/glsl/nir/nir.c | 54 > ++ > > 1 file changed, 54 insertions(+) > > > > diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c > > index 9a88bd3..8ea7bb5 100644 > > --- a/src/glsl/nir/nir.c > > +++ b/src/glsl/nir/nir.c > > @@ -1148,6 +1148,58 @@ stitch_blocks(nir_block *before, nir_block *after) > > exec_node_remove(&after->cf_node.node); > > } > > > > +static void > > +remove_defs_uses(nir_instr *instr); > > + > > +static void > > +cleanup_cf_node(nir_cf_node *node) > > +{ > > + switch (node->type) { > > + case nir_cf_node_block: { > > + nir_block *block = nir_cf_node_as_block(node); > > + /* We need to walk the instructions and clean up defs/uses */ > > + nir_foreach_instr(block, instr) > > + remove_defs_uses(instr); > > + break; > > + } > > + > > + case nir_cf_node_if: { > > + nir_if *if_stmt = nir_cf_node_as_if(node); > > + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) > > + cleanup_cf_node(child); > > + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) > > + cleanup_cf_node(child); > > + > > + struct set *if_uses; > > + if (if_stmt->condition.is_ssa) { > > + if_uses = if_stmt->condition.ssa->if_uses; > > + } else { > > + if_uses = if_stmt->condition.reg.reg->if_uses; > > + } > > + > > + struct set_entry *entry = _mesa_set_search(if_uses, if_stmt); > > + assert(entry); > > + _mesa_set_remove(if_uses, entry); > > + break; > > + } > > + > > + case nir_cf_node_loop: { > > + nir_loop *loop = nir_cf_node_as_loop(node); > > + foreach_list_typed(nir_cf_node, child, node, &loop->body) > > + cleanup_cf_node(child); > > + break; > > + } > > + case nir_cf_node_function: { > > + nir_function_impl *impl = nir_cf_node_as_function(node); > > + foreach_list_typed(nir_cf_node, child, node, &impl->body) > > + cleanup_cf_node(child); > > + break; > > + } > > + default: > > + unreachable("Invalid CF node type"); > > + } > > +} > > + > > void > > nir_cf_node_remove(nir_cf_node *node) > > { > > @@ -1175,6 +1227,8 @@ nir_cf_node_remove(nir_cf_node *node) > >exec_node_remove(&node->node); > >stitch_blocks(before_block, after_block); > > } > > + > > + cleanup_cf_node(node); > > } > > > > static bool > > -- > > 2.2.2 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 03/12] nir/from_ssa: Use the nir_block_dominance function instead of our own
Reviewed-by: Connor Abbott --- src/glsl/nir/nir_from_ssa.c | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c index 2e7add3..3625237 100644 --- a/src/glsl/nir/nir_from_ssa.c +++ b/src/glsl/nir/nir_from_ssa.c @@ -54,13 +54,8 @@ ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b) } else if (a->parent_instr->block == b->parent_instr->block) { return a->live_index <= b->live_index; } else { - nir_block *block = b->parent_instr->block; - while (block->imm_dom != NULL) { - if (block->imm_dom == a->parent_instr->block) -return true; - block = block->imm_dom; - } - return false; + return nir_block_dominates(a->parent_instr->block, + b->parent_instr->block); } } -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 09/12] nir: Make nir_[cf_node/instr]_[prev/next] return null if at the end
--- src/glsl/nir/nir.h | 28 ++-- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 5d84343..f4056f4 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -416,15 +416,23 @@ typedef struct { } nir_instr; static inline nir_instr * -nir_instr_next(const nir_instr *instr) +nir_instr_next(nir_instr *instr) { - return exec_node_data(nir_instr, (instr)->node.next, node); + struct exec_node *next = exec_node_get_next(&instr->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_instr, next, node); } static inline nir_instr * -nir_instr_prev(const nir_instr *instr) +nir_instr_prev(nir_instr *instr) { - return exec_node_data(nir_instr, (instr)->node.prev, node); + struct exec_node *prev = exec_node_get_prev(&instr->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_instr, prev, node); } typedef struct { @@ -1272,13 +1280,21 @@ typedef struct { static inline nir_cf_node * nir_cf_node_next(nir_cf_node *node) { - return exec_node_data(nir_cf_node, exec_node_get_next(&node->node), node); + struct exec_node *next = exec_node_get_next(&node->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_cf_node, next, node); } static inline nir_cf_node * nir_cf_node_prev(nir_cf_node *node) { - return exec_node_data(nir_cf_node, exec_node_get_prev(&node->node), node); + struct exec_node *prev = exec_node_get_prev(&node->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_cf_node, prev, node); } static inline bool -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 07/12] nir/validate: Validate SSA defs the same way we do for registers
Reviewed-by: Connor Abbott --- src/glsl/nir/nir_validate.c | 87 - 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index 13010ae..a3fe9d6 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -51,6 +51,15 @@ typedef struct { } reg_validate_state; typedef struct { + /* +* equivalent to the uses in nir_ssa_def, but built up by the validator. +* At the end, we verify that the sets have the same entries. +*/ + struct set *uses, *if_uses; + nir_function_impl *where_defined; +} ssa_def_validate_state; + +typedef struct { /* map of register -> validation state (struct above) */ struct hash_table *regs; @@ -135,19 +144,21 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state) assert(entry); - assert((nir_function_impl *) entry->data == state->impl && - "using an SSA value defined in a different function"); + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; - struct set_entry *entry2; + assert(def_state->where_defined == state->impl && + "using an SSA value defined in a different function"); if (state->instr) { - entry2 = _mesa_set_search(def->uses, state->instr); + _mesa_set_add(def_state->uses, state->instr); + + assert(_mesa_set_search(def->uses, state->instr)); } else { assert(state->if_stmt); - entry2 = _mesa_set_search(def->if_uses, state->if_stmt); - } + _mesa_set_add(def_state->if_uses, state->if_stmt); - assert(entry2 && "SSA use missing"); + assert(_mesa_set_search(def->if_uses, state->if_stmt)); + } /* TODO validate that the use is dominated by the definition */ } @@ -226,7 +237,15 @@ validate_ssa_def(nir_ssa_def *def, validate_state *state) BITSET_SET(state->ssa_defs_found, def->index); assert(def->num_components <= 4); - _mesa_hash_table_insert(state->ssa_defs, def, state->impl); + + ssa_def_validate_state *def_state = ralloc(state->ssa_defs, + ssa_def_validate_state); + def_state->where_defined = state->impl; + def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + _mesa_hash_table_insert(state->ssa_defs, def, def_state); } static void @@ -759,6 +778,56 @@ validate_var_decl(nir_variable *var, bool is_global, validate_state *state) } } +static bool +postvalidate_ssa_def(nir_ssa_def *def, void *void_state) +{ + validate_state *state = void_state; + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + if (def_state->uses->entries != def->uses->entries) { + printf("extra entries in SSA def uses:\n"); + struct set_entry *entry; + set_foreach(def->uses, entry) { + struct set_entry *entry2 = +_mesa_set_search(def_state->uses, entry->key); + + if (entry2 == NULL) { +printf("%p\n", entry->key); + } + } + + abort(); + } + + if (def_state->if_uses->entries != def->if_uses->entries) { + printf("extra entries in SSA def uses:\n"); + struct set_entry *entry; + set_foreach(def->if_uses, entry) { + struct set_entry *entry2 = +_mesa_set_search(def_state->if_uses, entry->key); + + if (entry2 == NULL) { +printf("%p\n", entry->key); + } + } + + abort(); + } + + return true; +} + +static bool +postvalidate_ssa_defs_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, postvalidate_ssa_def, state); + + return true; +} + static void validate_function_impl(nir_function_impl *impl, validate_state *state) { @@ -809,6 +878,8 @@ validate_function_impl(nir_function_impl *impl, validate_state *state) foreach_list_typed(nir_register, reg, node, &impl->registers) { postvalidate_reg_decl(reg, state); } + + nir_foreach_block(impl, postvalidate_ssa_defs_block, state); } static void -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 10/12] nir: Add an instruction index
This index, unlike the others, has to be manually updated by passes that want to use it. Reviewed-by: Connor Abbott --- src/glsl/nir/nir.c | 20 src/glsl/nir/nir.h | 4 2 files changed, 24 insertions(+) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 8ea7bb5..6758a72 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -352,6 +352,7 @@ instr_init(nir_instr *instr, nir_instr_type type) { instr->type = type; instr->block = NULL; + instr->index = UINT_MAX; exec_node_init(&instr->node); } @@ -2081,3 +2082,22 @@ nir_index_ssa_defs(nir_function_impl *impl) nir_foreach_block(impl, index_ssa_block, &index); impl->ssa_alloc = index; } + +static bool +index_instrs_block(nir_block *block, void *state) +{ + unsigned *index = (unsigned *)state; + + nir_foreach_instr(block, instr) + instr->index = (*index)++; + + return true; +} + +unsigned +nir_index_instrs(nir_function_impl *impl) +{ + unsigned index = 0; + nir_foreach_block(impl, index_instrs_block, &index); + return index; +} diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index f4056f4..90a7001 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -411,6 +411,9 @@ typedef struct { nir_instr_type type; struct nir_block *block; + /** generic instruction index. */ + unsigned index; + /* flag for dead code elimination (see nir_opt_dce.c) */ bool live; } nir_instr; @@ -1509,6 +1512,7 @@ nir_if *nir_block_get_following_if(nir_block *block); void nir_index_local_regs(nir_function_impl *impl); void nir_index_global_regs(nir_shader *shader); void nir_index_ssa_defs(nir_function_impl *impl); +unsigned nir_index_instrs(nir_function_impl *impl); void nir_index_blocks(nir_function_impl *impl); -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass
v2 Jason Ekstrand : - Use nir_dominance_lca for computing least common anscestors - Use the block index for comparing dominance tree depths - Pin things that do partial derivatives --- src/glsl/Makefile.sources | 1 + src/glsl/nir/nir.h | 2 + src/glsl/nir/nir_opt_gcm.c | 501 + 3 files changed, 504 insertions(+) create mode 100644 src/glsl/nir/nir_opt_gcm.c diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index a580b6e..69cb2e6 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -43,6 +43,7 @@ NIR_FILES = \ nir/nir_opt_copy_propagate.c \ nir/nir_opt_cse.c \ nir/nir_opt_dce.c \ + nir/nir_opt_gcm.c \ nir/nir_opt_global_to_local.c \ nir/nir_opt_peephole_select.c \ nir/nir_opt_remove_phis.c \ diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 90a7001..55fb43d 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader); bool nir_opt_dce_impl(nir_function_impl *impl); bool nir_opt_dce(nir_shader *shader); +void nir_opt_gcm(nir_shader *shader); + bool nir_opt_peephole_select(nir_shader *shader); bool nir_opt_peephole_ffma(nir_shader *shader); diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c new file mode 100644 index 000..d48518b --- /dev/null +++ b/src/glsl/nir/nir_opt_gcm.c @@ -0,0 +1,501 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Jason Ekstrand (ja...@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements Global Code Motion. A description of GCM can be found in + * "Global Code Motion; Global Value Numbering" by Cliff Click. + * Unfortunately, the algorithm presented in the paper is broken in a + * number of ways. The algorithm used here differs substantially from the + * one in the paper but it is, in my opinion, much easier to read and + * verify correcness. + */ + +struct gcm_block_info { + /* Number of loops this block is inside */ + unsigned loop_depth; + + /* The last instruction inserted into this block. This is used as we +* traverse the instructions and insert them back into the program to +* put them in the right order. +*/ + nir_instr *last_instr; +}; + +struct gcm_state { + nir_function_impl *impl; + nir_instr *instr; + + /* Marks all instructions that have been visited by the curren pass */ + BITSET_WORD *visited; + + /* Marks instructions that are "pinned", i.e. cannot be moved from their +* basic block by code motion */ + BITSET_WORD *pinned; + + /* The list of non-pinned instructions. As we do the late scheduling, +* we pull non-pinned instructions out of their blocks and place them in +* this list. This saves us from having linked-list problems when we go +* to put instructions back in their blocks. +*/ + struct exec_list instrs; + + struct gcm_block_info *blocks; +}; + +/* Recursively walks the CFG and builds the block_info structure */ +static void +gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state, + unsigned loop_depth) +{ + foreach_list_typed(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + state->blocks[block->index].loop_depth = loop_depth; + break; + } + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + gcm_build_block_info(&if_stmt->then_list, state, loop_depth); + gcm_build_block_info(&if_stmt->else_list, state, loop_depth); + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + gcm_build_block_info(&loop->body, state, loop_depth + 1); +
[Mesa-dev] [PATCH v2 06/12] nir/validate: Validate if_uses on registers
Reviewed-by: Connor Abbott --- src/glsl/nir/nir_validate.c | 62 - 1 file changed, 44 insertions(+), 18 deletions(-) diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index a34a017..13010ae 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -46,7 +46,7 @@ typedef struct { * equivalent to the uses and defs in nir_register, but built up by the * validator. At the end, we verify that the sets have the same entries. */ - struct set *uses, *defs; + struct set *uses, *if_uses, *defs; nir_function_impl *where_defined; /* NULL for global registers */ } reg_validate_state; @@ -92,16 +92,22 @@ validate_reg_src(nir_reg_src *src, validate_state *state) { assert(src->reg != NULL); - struct set_entry *entry = _mesa_set_search(src->reg->uses, state->instr); - assert(entry && "use not in nir_register.uses"); + struct hash_entry *entry; + entry = _mesa_hash_table_search(state->regs, src->reg); + assert(entry); - struct hash_entry *entry2; - entry2 = _mesa_hash_table_search(state->regs, src->reg); + reg_validate_state *reg_state = (reg_validate_state *) entry->data; - assert(entry2); + if (state->instr) { + _mesa_set_add(reg_state->uses, state->instr); - reg_validate_state *reg_state = (reg_validate_state *) entry2->data; - _mesa_set_add(reg_state->uses, state->instr); + assert(_mesa_set_search(src->reg->uses, state->instr)); + } else { + assert(state->if_stmt); + _mesa_set_add(reg_state->if_uses, state->if_stmt); + + assert(_mesa_set_search(src->reg->if_uses, state->if_stmt)); + } if (!src->reg->is_global) { assert(reg_state->where_defined == state->impl && @@ -134,7 +140,12 @@ validate_ssa_src(nir_ssa_def *def, validate_state *state) struct set_entry *entry2; - entry2 = _mesa_set_search(def->uses, state->instr); + if (state->instr) { + entry2 = _mesa_set_search(def->uses, state->instr); + } else { + assert(state->if_stmt); + entry2 = _mesa_set_search(def->if_uses, state->if_stmt); + } assert(entry2 && "SSA use missing"); @@ -484,6 +495,8 @@ validate_instr(nir_instr *instr, validate_state *state) assert(!"Invalid ALU instruction type"); break; } + + state->instr = NULL; } static void @@ -501,6 +514,7 @@ validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state) instr->dest.ssa.num_components); validate_src(&src->src, state); + state->instr = NULL; return; } } @@ -562,6 +576,8 @@ validate_block(nir_block *block, validate_state *state) static void validate_if(nir_if *if_stmt, validate_state *state) { + state->if_stmt = if_stmt; + assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev)); nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); assert(prev_node->type == nir_cf_node_block); @@ -576,15 +592,7 @@ validate_if(nir_if *if_stmt, validate_state *state) nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node); assert(next_node->type == nir_cf_node_block); - if (!if_stmt->condition.is_ssa) { - nir_register *reg = if_stmt->condition.reg.reg; - struct set_entry *entry = _mesa_set_search(reg->if_uses, if_stmt); - assert(entry); - } else { - nir_ssa_def *def = if_stmt->condition.ssa; - struct set_entry *entry = _mesa_set_search(def->if_uses, if_stmt); - assert(entry); - } + validate_src(&if_stmt->condition, state); assert(!exec_list_is_empty(&if_stmt->then_list)); assert(!exec_list_is_empty(&if_stmt->else_list)); @@ -603,6 +611,7 @@ validate_if(nir_if *if_stmt, validate_state *state) } state->parent_node = old_parent; + state->if_stmt = NULL; } static void @@ -672,6 +681,8 @@ prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, _mesa_key_pointer_equal); + reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -702,6 +713,21 @@ postvalidate_reg_decl(nir_register *reg, validate_state *state) abort(); } + if (reg_state->if_uses->entries != reg->if_uses->entries) { + printf("extra entries in register if_uses:\n"); + struct set_entry *entry; + set_foreach(reg->if_uses, entry) { + struct set_entry *entry2 = +_mesa_set_search(reg_state->if_uses, entry->key); + + if (entry2 == NULL) { +printf("%p\n", entry->key); + } + } + + abort(); + } + if (reg_state->defs->entries
[Mesa-dev] [PATCH v2 08/12] nir/from_ssa: Don't try to read an invalid instruction
--- src/glsl/nir/nir_from_ssa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c index 3625237..7c50095 100644 --- a/src/glsl/nir/nir_from_ssa.c +++ b/src/glsl/nir/nir_from_ssa.c @@ -272,7 +272,7 @@ get_parallel_copy_at_end_of_block(nir_block *block) if (last_instr->type == nir_instr_type_jump) last_instr = nir_instr_prev(last_instr); - if (last_instr->type == nir_instr_type_parallel_copy) + if (last_instr && last_instr->type == nir_instr_type_parallel_copy) return nir_instr_as_parallel_copy(last_instr); else return NULL; -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 05/12] nir: Properly clean up CF nodes when we remove them
Previously, if you remved a CF node that still had instructions in it, none of the use/def information from those instructions would get cleaned up. Also, we weren't removing if statements from the if_uses of the corresponding register or SSA def. This commit fixes both of these problems --- src/glsl/nir/nir.c | 54 ++ 1 file changed, 54 insertions(+) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 9a88bd3..8ea7bb5 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -1148,6 +1148,58 @@ stitch_blocks(nir_block *before, nir_block *after) exec_node_remove(&after->cf_node.node); } +static void +remove_defs_uses(nir_instr *instr); + +static void +cleanup_cf_node(nir_cf_node *node) +{ + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + /* We need to walk the instructions and clean up defs/uses */ + nir_foreach_instr(block, instr) + remove_defs_uses(instr); + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) + cleanup_cf_node(child); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) + cleanup_cf_node(child); + + struct set *if_uses; + if (if_stmt->condition.is_ssa) { + if_uses = if_stmt->condition.ssa->if_uses; + } else { + if_uses = if_stmt->condition.reg.reg->if_uses; + } + + struct set_entry *entry = _mesa_set_search(if_uses, if_stmt); + assert(entry); + _mesa_set_remove(if_uses, entry); + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + foreach_list_typed(nir_cf_node, child, node, &loop->body) + cleanup_cf_node(child); + break; + } + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + foreach_list_typed(nir_cf_node, child, node, &impl->body) + cleanup_cf_node(child); + break; + } + default: + unreachable("Invalid CF node type"); + } +} + void nir_cf_node_remove(nir_cf_node *node) { @@ -1175,6 +1227,8 @@ nir_cf_node_remove(nir_cf_node *node) exec_node_remove(&node->node); stitch_blocks(before_block, after_block); } + + cleanup_cf_node(node); } static bool -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 12/12] i965/nir: Use Global Code Motion
v2: Do GCM after dead code elimination --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index da81b61..a0e6b1d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -44,6 +44,8 @@ nir_optimize(nir_shader *nir) nir_validate_shader(nir); progress |= nir_opt_dce(nir); nir_validate_shader(nir); + nir_opt_gcm(nir); + nir_validate_shader(nir); progress |= nir_opt_cse(nir); nir_validate_shader(nir); progress |= nir_opt_peephole_select(nir); -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 04/12] nir: use nir_foreach_ssa_def for indexing ssa defs
This is both simpler and more correct. The old code didn't properly index load_const instructions. Reviewed-by: Connor Abbott --- src/glsl/nir/nir.c | 26 -- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 10e6ed3..9a88bd3 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -2003,37 +2003,19 @@ nir_index_blocks(nir_function_impl *impl) } static void -index_ssa_def(nir_ssa_def *def, unsigned *index) +index_ssa_def_cb(nir_ssa_def *def, void *state) { + unsigned *index = (unsigned *) state; def->index = (*index)++; -} -static bool -index_ssa_def_cb(nir_dest *dest, void *state) -{ - unsigned *index = (unsigned *) state; - if (dest->is_ssa) - index_ssa_def(&dest->ssa, index); return true; } -static void -index_ssa_undef(nir_ssa_undef_instr *instr, unsigned *index) -{ - index_ssa_def(&instr->def, index); -} - static bool index_ssa_block(nir_block *block, void *state) { - unsigned *index = (unsigned *) state; - - nir_foreach_instr(block, instr) { - if (instr->type == nir_instr_type_ssa_undef) - index_ssa_undef(nir_instr_as_ssa_undef(instr), index); - else - nir_foreach_dest(instr, index_ssa_def_cb, state); - } + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, index_ssa_def_cb, state); return true; } -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 01/12] nir/dominance: Expose the dominance intersection function
Being able to find the least common anscestor in the dominance tree is a useful thing that we may want to do in other passes. In particular, we need it for GCM. v2: Handle NULL inputs by returning the other block --- src/glsl/nir/nir.h | 2 ++ src/glsl/nir/nir_dominance.c | 22 ++ 2 files changed, 24 insertions(+) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 4cb2e92..886dcd2 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1500,6 +1500,8 @@ static inline void nir_validate_shader(nir_shader *shader) { } void nir_calc_dominance_impl(nir_function_impl *impl); void nir_calc_dominance(nir_shader *shader); +nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); + void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); void nir_dump_dom_tree(nir_shader *shader, FILE *fp); diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c index 67fdcc6..831b9a3 100644 --- a/src/glsl/nir/nir_dominance.c +++ b/src/glsl/nir/nir_dominance.c @@ -212,6 +212,28 @@ nir_calc_dominance(nir_shader *shader) } } +/** + * Computes the least common anscestor of two blocks. If one of the blocks + * is null, the other block is returned. + */ +nir_block * +nir_dominance_lca(nir_block *b1, nir_block *b2) +{ + if (b1 == NULL) + return b2; + + if (b2 == NULL) + return b1; + + assert(nir_cf_node_get_function(&b1->cf_node) == + nir_cf_node_get_function(&b2->cf_node)); + + assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & + nir_metadata_dominance); + + return intersect(b1, b2); +} + static bool dump_block_dom(nir_block *block, void *state) { -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v2 02/12] nir/dominance: Add a constant-time mechanism for comparing blocks
This is mostly thanks to Connor. The idea is to do a depth-first search that computes pre and post indices for all the blocks. We can then figure out if one block dominates another in constant time by two simple comparison operations. Reviewed-by: Connor Abbott --- src/glsl/nir/nir.h | 9 + src/glsl/nir/nir_dominance.c | 30 ++ 2 files changed, 39 insertions(+) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 886dcd2..5d84343 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1135,6 +1135,14 @@ typedef struct nir_block { /* Set of nir_block's on the dominance frontier of this block */ struct set *dom_frontier; + /* +* These two indices have the property that dom_{pre,post}_index for each +* child of this block in the dominance tree will always be between +* dom_pre_index and dom_post_index for this block, which makes testing if +* a given block is dominated by another block an O(1) operation. +*/ + unsigned dom_pre_index, dom_post_index; + /* live in and out for this block; used for liveness analysis */ BITSET_WORD *live_in; BITSET_WORD *live_out; @@ -1501,6 +1509,7 @@ void nir_calc_dominance_impl(nir_function_impl *impl); void nir_calc_dominance(nir_shader *shader); nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); +bool nir_block_dominates(nir_block *parent, nir_block *child); void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); void nir_dump_dom_tree(nir_shader *shader, FILE *fp); diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c index 831b9a3..d69e4f4 100644 --- a/src/glsl/nir/nir_dominance.c +++ b/src/glsl/nir/nir_dominance.c @@ -177,6 +177,17 @@ calc_dom_children(nir_function_impl* impl) nir_foreach_block(impl, block_add_child, NULL); } +static void +calc_dfs_indicies(nir_block *block, unsigned *index) +{ + block->dom_pre_index = (*index)++; + + for (unsigned i = 0; i < block->num_dom_children; i++) + calc_dfs_indicies(block->dom_children[i], index); + + block->dom_post_index = (*index)++; +} + void nir_calc_dominance_impl(nir_function_impl *impl) { @@ -201,6 +212,9 @@ nir_calc_dominance_impl(nir_function_impl *impl) impl->start_block->imm_dom = NULL; calc_dom_children(impl); + + unsigned dfs_index = 0; + calc_dfs_indicies(impl->start_block, &dfs_index); } void @@ -234,6 +248,22 @@ nir_dominance_lca(nir_block *b1, nir_block *b2) return intersect(b1, b2); } +/** + * Returns true if parent dominates child + */ +bool +nir_block_dominates(nir_block *parent, nir_block *child) +{ + assert(nir_cf_node_get_function(&parent->cf_node) == + nir_cf_node_get_function(&child->cf_node)); + + assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & + nir_metadata_dominance); + + return child->dom_pre_index >= parent->dom_pre_index && + child->dom_post_index <= parent->dom_post_index; +} + static bool dump_block_dom(nir_block *block, void *state) { -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 84570] Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting
https://bugs.freedesktop.org/show_bug.cgi?id=84570 --- Comment #32 from Kai --- (In reply to Michel Dänzer from comment #31) > (In reply to Kai from comment #30) > > Michel, is there any chance attachment 107544 will be > > part of 3.18? > > No, but it's in Alex's queue for 3.19. 3.19 has been released. As I'm not sure, what the situation with Intel GPUs is, it's not clear to me, whether this bug should be closed, renamed or duplicated (with this one being closed afterwards for Radeon GPUs and the other renamed to Intel: …). But for me the game ran pretty well over the past weeks with the various RC versions of 3.19. Michel/Tapani, what do you think? -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 84570] Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting
https://bugs.freedesktop.org/show_bug.cgi?id=84570 --- Comment #33 from Kenneth Graunke --- Aspyr's latest release contains bugfixes related to this, and that's solved the problem on Intel. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Expected wide line rendering with clipping
On Fri, 2015-02-06 at 21:27 +0100, Roland Scheidegger wrote: > Am 06.02.2015 um 13:11 schrieb Iago Toral: > > Hi, > > > > Eduardo and I have been looking into a few dEQP test failures that deal > > with wide line rendering. There are a few of them that fail because of > > how clipping is implemented for this case. > > > > The problem in these cases seems to be that the hw renders the wide line > > as a parallelogram so that if an edge of the parallelogram is partially > > clipped, the other parallel edge will also be clipped at the same > > height/width so that the resulting wide line stays a parallelogram. The > > dEQP renders as if a wide line where a collection of 1px lines, so > > cliping one edge of the resulting line does not have implications for > > the other edge. > > > > This ASCII art illustrates the problem (* represent line pixels, | > > represents the viewport's rightmost edge): > > > > Expected by dEQP i965 rendering > > | | > > *| | > >**| | > > ***| | > > | | > > | | > > | | > > > > We can make the rendering result match dEQP's expectation by enabling > > the GuardBand in the clip stage (GEN6_CLIP_GB_TEST). This is being > > disabled by the driver for gen7 in this scenario because the underlying > > framebuffer surface is larger than the viewport, and per the comment in > > the code, in gen7 that would make it render beyond the viewport limits > > in that surface, while it notes that gen8 hw fixes this. So I guess that > > we do not want to do this in any case in gen7. > > > > Then, I have been reviewing the OpenGL specs to see if they clarify what > > should happen when clipping wide lines and I think the spec does not > > make a clear point, since when it talks about line clipping it does not > > cover the case of wide lines specifically: > > > > "13.5. PRIMITIVE CLIPPING > > ... > > If part of the line segment lies in the volume and part lies outside, > > then the line segment is clipped and new vertex coordinates are computed > > for one or both vertices. A clipped line segment endpoint lies on both > > the original line segment and the boundary of the clip volume. > > ... > > " > > > > The above description is clear for 1px lines, but where should the new > > vertex be generated exactly for wide lines, where many points could be > > at the boundary of the clip volume? Is any point valid? (that would mean > > the dEQP test is bogus because there are multiple accepted renderings), > > should the new vertex be exactly at the center of the line? (that would > > make both deqp and intel rendering bogus). > > > > Then there is also this comment in "14.5.2.2 Wide Lines" inside the > > non-AA line rasterization chapter: > > > > "Non-antialiased line segments of width other than one are rasterized by > > off-setting them in the minor direction (for an x-major line, the minor > > direction is y, and for a y-major line, the minor direction is x) and > > replicating fragments in the minor direction (see figure 14.3). Let w be > > the width rounded to the nearest integer (if w = 0, then it is as if w = > > 1). If the line segment has endpoints given by (x 0 , y 0 ) and (x 1 , y > > 1 ) in window coordinates, the segment with endpoints (x 0 , y 0 − (w − > > 1)/2) and (x 1 , y 1 − (w − 1)/2) is rasterized, but instead of a single > > fragment, a column of fragments of height w (a row of fragments of > > length w for a y-major segment) is produced at each x (y for y-major) > > location. The lowest fragment of this column is the fragment that would > > be produced by rasterizing the segment of width 1 with the modified > > coordinates." > > > > This "suggests" that wide line rendering should be equivalent to > > rendering multiple lines of width=1 and if we actually did that then the > > expected rendering result after clipping would be what dEQP expects. > > > > I think the whole thing isn't 100% clear. Does anyone have any thoughts > > on what should be considered correct behavior in this case? > > > > Iago > > > > > > I am quite sure that i965 rendering is what (at least legacy GL) > expects. This is because the wide-line expansion is defined to be > happening at rasterization time, i.e. past clipping. Thus, at clipping > time, no matter if a line is wide or not, it is still really a line, > hence clipping will produce a single new endpoint at the viewport edge > and in rasterization this is then what will be used as the endpoint. That makes sense, but in that case I think i965 rendering would still be incorrect, the expected result would look something like this: | | **| ***| | | | However, as far as I can tell, since i965 hardware renders wide lines as parallelograms, it can't render something like this
[Mesa-dev] [PATCH v4] glsl: Linking support for doubles
From: Dave Airlie Signed-off-by: Dave Airlie Reviewed-by: Ian Romanick --- Removed slot multiplication in link_varyings, since component_slots now takes care of it. src/glsl/link_uniform_initializers.cpp | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index f6a60bc..6907384 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -75,6 +75,11 @@ copy_constant_to_storage(union gl_constant_value *storage, case GLSL_TYPE_FLOAT: storage[i].f = val->value.f[i]; break; + case GLSL_TYPE_DOUBLE: + /* XXX need to check on big-endian */ + storage[i * 2].u = *(uint32_t *)&val->value.d[i]; + storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1); + break; case GLSL_TYPE_BOOL: storage[i].b = val->value.b[i] ? boolean_true : 0; break; @@ -200,6 +205,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, val->array_elements[0]->type->base_type; const unsigned int elements = val->array_elements[0]->type->components(); unsigned int idx = 0; + unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1; assert(val->type->length >= storage->array_elements); for (unsigned int i = 0; i < storage->array_elements; i++) { @@ -209,7 +215,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, elements, boolean_true); -idx += elements; +idx += elements * dmul; } } else { copy_constant_to_storage(storage->storage, -- 2.0.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4] glsl: Uniform linking support for doubles
From: Dave Airlie Signed-off-by: Dave Airlie --- Removed now-unnecessary component_slots multiplication, check the base type without array. src/glsl/link_uniforms.cpp | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index de2f6c9..88d9b74 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -608,7 +608,11 @@ private: } if (type->without_array()->is_matrix()) { - this->uniforms[id].matrix_stride = 16; +const unsigned N = type->without_array()->base_type == GLSL_TYPE_DOUBLE ? 8 : 4; +const unsigned items = row_major ? type->matrix_columns : type->vector_elements; + +assert(items <= 4); +this->uniforms[id].matrix_stride = glsl_align(items * N, 16); this->uniforms[id].row_major = row_major; } else { this->uniforms[id].matrix_stride = 0; -- 2.0.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH v4] glsl: Add double builtin type generation
From: Dave Airlie Signed-off-by: Dave Airlie Reviewed-by: Matt Turner Reviewed-by: Ian Romanick --- Made component_slots() return 2 instead of 1 for double. Also added to list for uniform_locations(), returning 1. src/glsl/builtin_type_macros.h | 16 ++ src/glsl/builtin_types.cpp | 30 +++ src/glsl/glsl_parser_extras.h | 5 ++ src/glsl/glsl_types.cpp| 112 + src/glsl/glsl_types.h | 19 ++- 5 files changed, 159 insertions(+), 23 deletions(-) diff --git a/src/glsl/builtin_type_macros.h b/src/glsl/builtin_type_macros.h index 236e1ce..bf74eb3 100644 --- a/src/glsl/builtin_type_macros.h +++ b/src/glsl/builtin_type_macros.h @@ -64,6 +64,22 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3) DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4) DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4) +DECL_TYPE(double, GL_DOUBLE,GLSL_TYPE_DOUBLE, 1, 1) +DECL_TYPE(dvec2, GL_DOUBLE_VEC2, GLSL_TYPE_DOUBLE, 2, 1) +DECL_TYPE(dvec3, GL_DOUBLE_VEC3, GLSL_TYPE_DOUBLE, 3, 1) +DECL_TYPE(dvec4, GL_DOUBLE_VEC4, GLSL_TYPE_DOUBLE, 4, 1) + +DECL_TYPE(dmat2, GL_DOUBLE_MAT2, GLSL_TYPE_DOUBLE, 2, 2) +DECL_TYPE(dmat3, GL_DOUBLE_MAT3, GLSL_TYPE_DOUBLE, 3, 3) +DECL_TYPE(dmat4, GL_DOUBLE_MAT4, GLSL_TYPE_DOUBLE, 4, 4) + +DECL_TYPE(dmat2x3, GL_DOUBLE_MAT2x3, GLSL_TYPE_DOUBLE, 3, 2) +DECL_TYPE(dmat2x4, GL_DOUBLE_MAT2x4, GLSL_TYPE_DOUBLE, 4, 2) +DECL_TYPE(dmat3x2, GL_DOUBLE_MAT3x2, GLSL_TYPE_DOUBLE, 2, 3) +DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3) +DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4) +DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4) + DECL_TYPE(sampler1D, GL_SAMPLER_1D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D, 0, 0, GLSL_TYPE_FLOAT) DECL_TYPE(sampler2D, GL_SAMPLER_2D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D, 0, 0, GLSL_TYPE_FLOAT) DECL_TYPE(sampler3D, GL_SAMPLER_3D, GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D, 0, 0, GLSL_TYPE_FLOAT) diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp index 10fac0f..fef86df 100644 --- a/src/glsl/builtin_types.cpp +++ b/src/glsl/builtin_types.cpp @@ -159,6 +159,20 @@ const static struct builtin_type_versions { T(mat4x2, 120, 300) T(mat4x3, 120, 300) + T(double, 400, 999) + T(dvec2, 400, 999) + T(dvec3, 400, 999) + T(dvec4, 400, 999) + T(dmat2, 400, 999) + T(dmat3, 400, 999) + T(dmat4, 400, 999) + T(dmat2x3, 400, 999) + T(dmat2x4, 400, 999) + T(dmat3x2, 400, 999) + T(dmat3x4, 400, 999) + T(dmat4x2, 400, 999) + T(dmat4x3, 400, 999) + T(sampler1D, 110, 999) T(sampler2D, 110, 100) T(sampler3D, 110, 300) @@ -361,5 +375,21 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) if (state->ARB_shader_atomic_counters_enable) { add_type(symbols, glsl_type::atomic_uint_type); } + + if (state->ARB_gpu_shader_fp64_enable) { + add_type(symbols, glsl_type::double_type); + add_type(symbols, glsl_type::dvec2_type); + add_type(symbols, glsl_type::dvec3_type); + add_type(symbols, glsl_type::dvec4_type); + add_type(symbols, glsl_type::dmat2_type); + add_type(symbols, glsl_type::dmat3_type); + add_type(symbols, glsl_type::dmat4_type); + add_type(symbols, glsl_type::dmat2x3_type); + add_type(symbols, glsl_type::dmat2x4_type); + add_type(symbols, glsl_type::dmat3x2_type); + add_type(symbols, glsl_type::dmat3x4_type); + add_type(symbols, glsl_type::dmat4x2_type); + add_type(symbols, glsl_type::dmat4x3_type); + } } /** @} */ diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index dafee4e..ea53270 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -205,6 +205,11 @@ struct _mesa_glsl_parse_state { || EXT_separate_shader_objects_enable; } + bool has_double() const + { + return ARB_gpu_shader_fp64_enable || is_version(400, 0); + } + void process_version_directive(YYLTYPE *locp, int version, const char *ident); diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index b4223f4..f6c990f 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -194,6 +194,22 @@ glsl_type::contains_integer() const } bool +glsl_type::contains_double() const +{ + if (this->is_array()) { + return this->fields.array->contains_double(
[Mesa-dev] [PATCH v4] mesa: add double uniform support. (v5)
From: Dave Airlie This adds support for the new uniform interfaces from ARB_gpu_shader_fp64. v2: support ARB_separate_shader_objects ProgramUniform*d* (Ian) don't allow boolean uniforms to be updated (issue 15) (Ian) v3: fix size_mul v4: Teach uniform update to take into account double precision (Topi) v5: add transpose for double case (Ilia) Signed-off-by: Dave Airlie --- Changed ir_to_mesa to properly take into account the fact that a dvec2 takes up a single slot. Also add missed dmul to index on src storage array. src/mesa/main/uniform_query.cpp | 46 +++--- src/mesa/main/uniforms.c| 185 src/mesa/main/uniforms.h| 3 +- src/mesa/program/ir_to_mesa.cpp | 26 +- 4 files changed, 228 insertions(+), 32 deletions(-) diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index d36f506..40327fb 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -469,6 +469,9 @@ log_uniform(const void *values, enum glsl_base_type basicType, case GLSL_TYPE_FLOAT: printf("%g ", v[i].f); break; + case GLSL_TYPE_DOUBLE: + printf("%g ", *(double* )&v[i * 2].f); + break; default: assert(!"Should not get here."); break; @@ -529,11 +532,12 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni, */ const unsigned components = MAX2(1, uni->type->vector_elements); const unsigned vectors = MAX2(1, uni->type->matrix_columns); + const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1; /* Store the data in the driver's requested type in the driver's storage * areas. */ - unsigned src_vector_byte_stride = components * 4; + unsigned src_vector_byte_stride = components * 4 * dmul; for (i = 0; i < uni->num_driver_storage; i++) { struct gl_uniform_driver_storage *const store = &uni->driver_storage[i]; @@ -541,7 +545,7 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni, const unsigned extra_stride = store->element_stride - (vectors * store->vector_stride); const uint8_t *src = -(uint8_t *) (&uni->storage[array_index * (components * vectors)].i); +(uint8_t *) (&uni->storage[array_index * (dmul * components * vectors)].i); #if 0 printf("%s: %p[%d] components=%u vectors=%u count=%u vector_stride=%u " @@ -608,6 +612,7 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, unsigned src_components) { unsigned offset; + int size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1; struct gl_uniform_storage *const uni = validate_uniform_parameters(ctx, shProg, location, count, @@ -623,7 +628,7 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, bool match; switch (uni->type->base_type) { case GLSL_TYPE_BOOL: - match = true; + match = (basicType != GLSL_TYPE_DOUBLE); break; case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: @@ -710,8 +715,8 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg, /* Store the data in the "actual type" backing storage for the uniform. */ if (!uni->type->is_boolean()) { - memcpy(&uni->storage[components * offset], values, -sizeof(uni->storage[0]) * components * count); + memcpy(&uni->storage[size_mul * components * offset], values, +sizeof(uni->storage[0]) * components * count * size_mul); } else { const union gl_constant_value *src = (const union gl_constant_value *) values; @@ -808,13 +813,14 @@ extern "C" void _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg, GLuint cols, GLuint rows, GLint location, GLsizei count, - GLboolean transpose, const GLfloat *values) + GLboolean transpose, + const GLvoid *values, GLenum type) { unsigned offset; unsigned vectors; unsigned components; unsigned elements; - + int size_mul; struct gl_uniform_storage *const uni = validate_uniform_parameters(ctx, shProg, location, count, &offset, "glUniformMatrix"); @@ -827,6 +833,9 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg, return; } + assert(type == GL_FLOAT || type == GL_DOUBLE); + size_mul = type == GL_DOUBLE ? 2 : 1; + assert(!uni->type->is_sampler()); vectors = uni->type->matrix_columns; components = uni->type->vector_elements; @@ -852,7 +861,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg, } if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) { - log_uniform(values, GLSL_TYPE_FLOAT, components, vectors, count, + log_uniform(values, uni->type->base_type, components, vectors, count,
[Mesa-dev] [PATCH v4] glsl: lower double optional passes (v2)
From: Dave Airlie These lowering passes are optional for the backend to request, currently the TGSI softpipe backend most likely the r600g backend would want to use these passes as is. They aim to hit the gallium opcodes from the standard rounding/truncation functions. v2: also lower floor in mod_to_floor Signed-off-by: Dave Airlie --- Earlier changes messed things up... and went untested because nvc0 doesn't use this lowering pass. Re-ran on softpipe, works fine again. src/glsl/ir_optimization.h | 1 + src/glsl/lower_instructions.cpp | 175 2 files changed, 176 insertions(+) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 912d910..9f91e2f 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -41,6 +41,7 @@ #define CARRY_TO_ARITH 0x200 #define BORROW_TO_ARITH0x400 #define SAT_TO_CLAMP 0x800 +#define DOPS_TO_DFRAC 0x1000 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 8994aad..c63c018 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -42,6 +42,7 @@ * - CARRY_TO_ARITH * - BORROW_TO_ARITH * - SAT_TO_CLAMP + * - DOPS_TO_DFRAC * * SUB_TO_ADD_NEG: * --- @@ -112,6 +113,9 @@ * - * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) * + * DOPS_TO_DFRAC: + * -- + * Converts double trunc, ceil, floor, round to fract */ #include "main/core.h" /* for M_LOG2E */ @@ -151,6 +155,11 @@ private: void sat_to_clamp(ir_expression *); void double_dot_to_fma(ir_expression *); void double_lrp(ir_expression *); + void dceil_to_dfrac(ir_expression *); + void dfloor_to_dfrac(ir_expression *); + void dround_even_to_dfrac(ir_expression *); + void dtrunc_to_dfrac(ir_expression *); + void dsign_to_csel(ir_expression *); }; } /* anonymous namespace */ @@ -315,6 +324,9 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir) ir_expression *const floor_expr = new(ir) ir_expression(ir_unop_floor, x->type, div_expr); + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(floor_expr); + ir_expression *const mul_expr = new(ir) ir_expression(ir_binop_mul, new(ir) ir_dereference_variable(y), @@ -578,6 +590,145 @@ lower_instructions_visitor::double_lrp(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) +{ + /* +* frtemp = frac(x); +* temp = sub(x, frtemp); +* result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); +*/ + ir_instruction &i = *base_ir; + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(ir->operands[0]))); + + ir->operation = ir_binop_add; + ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); + ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); + + this->progress = true; +} + +void +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) +{ + /* +* frtemp = frac(x); +* result = sub(x, frtemp); +*/ + ir->operation = ir_binop_sub; + ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); + + this->progress = true; +} +void +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) +{ + /* +* insane but works +* temp = x + 0.5; +* frtemp = frac(temp); +* t2 = sub(temp, frtemp); +* if (frac(x) == 0.5) +* result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; +* else +* result = t2; + +*/ + ir_instruction &i = *base_ir; + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", + ir_var_temporary); + ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + + i.insert_before(temp); + i.insert_before(assign(temp, add(ir->operands[0], p5))); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(temp))); + + i.insert_before(t2); + i.insert_before(assign(t2, sub(temp, frtemp))); + + ir->operation = ir_triop_csel; + ir->op
Re: [Mesa-dev] Expected wide line rendering with clipping
On Sat, 2015-02-07 at 13:41 +0100, Erik Faye-Lund wrote: > On Fri, Feb 6, 2015 at 1:11 PM, Iago Toral wrote: > > Hi, > > > > Eduardo and I have been looking into a few dEQP test failures that deal > > with wide line rendering. There are a few of them that fail because of > > how clipping is implemented for this case. > > > > The problem in these cases seems to be that the hw renders the wide line > > as a parallelogram so that if an edge of the parallelogram is partially > > clipped, the other parallel edge will also be clipped at the same > > height/width so that the resulting wide line stays a parallelogram. The > > dEQP renders as if a wide line where a collection of 1px lines, so > > cliping one edge of the resulting line does not have implications for > > the other edge. > > > > This ASCII art illustrates the problem (* represent line pixels, | > > represents the viewport's rightmost edge): > > > > Expected by dEQP i965 rendering > > | | > > *| | > >**| | > > ***| | > > | | > > | | > > | | > > Is that drawing correct? I would expect it to look like this: > > | | > *| | > **|**|** >***| ***|* > | | > | | > | | Yes, it is correct. Your expectation makes sense if things happen as Roland described (first we clip the 1px line, then we expand), but the result in i965 looks more like what you would obtain if you do it the other way around. As I mentioned in my response to Roland, I think i965 can't render what you expect (at least directly), the reason being that it renders wide lines as parallelograms instead of multiple 1px lines. > From the drawing, it looks like the line is exactly 45 degrees, so I > believe this particular case is actually undefined, because of the > following spec wording: Right, that was just my drawing. The dEQP test case renders an x-major line. > "Line segment rasterization begins by characterizing the segment as > either x-major or y-major. x-major line segments have slope in the > closed interval [−1, 1]; all other line segments are y-major (slope is > determined by the segment’s endpoints)." > > A 45 degree line should have a slope of 1, and should in theory be > identified as an x-major line, giving the result dEQP expects (i965 is > in this case rendering the line as similar to y-major, except y-major > rendering should also generate fragments outside the viewport, as > pointed out in my drawing above). However, since we're exactly at the > break-off point, previous calculations are allowed to be off by some > small value, leaving the characterization undefined. > > So I think in this case, both results are allowed, and the test is > therefore somewhat bogus. If the line was tilted a tiny amount so it's > guaranteed to be characterized as an x-major, then it should be > correct again. Not having access to the tests myself, they might > already have done this, though. > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v3 00/29] add fp64 support to mesa and glsl compiler
I just sent out a few updated patches (latest can be seen in https://github.com/imirkin/mesa/commits/fp64-5) -- with all that in place the mysterious failure in one of the initializer tests is gone, and the constbuf locations being used correspond to the ones that we expect instead of having *tons* of empty space. On Sun, Feb 8, 2015 at 4:00 AM, Ilia Mirkin wrote: > This is v3 of the patchset which includes fixes based on Ian's > feedback with the exception of his comment on patch 23 (glsl: validate > output types for shader stages), i.e. whether it needs to exist at all > and isn't that being checked elsewhere. I am unfortunately not > familiar enough with the overall flow of things yet to work it > out. Perhaps someone else can take a look... Tapani? > > Chris also had a suggestion for standardizing the size_mul/dmul/etc > variable names... unfortunately various ones count different > things. Some are 4/8, others are 1/2. I left them as is for now. > > If I missed anything from the existing feedback, please let me > know. As before, this passes all current fp64 tests in piglit on nvc0 > with the exception of 4 initializer tests which Dave is looking at but > is fairly confident the error is in st/mesa somewhere. > > Patches also available at https://github.com/imirkin/mesa/commits/fp64-4 > > Dave Airlie (25): > glapi: add ARB_gpu_shader_fp64 (v2) > mesa: add ARB_gpu_shader_fp64 extension info (v2) > mesa: add mesa_type_is_double helper function (v2) > mesa: add double uniform support. (v5) > glsl: add ARB_gpu_shader_fp64 to the glsl extensions. (v2) > glsl: Add double builtin type generation > glsl: Uniform linking support for doubles > glsl/ir: Add builtin function support for doubles > glsl/ir: Add printing support for doubles > glsl/ir: Add cloning support for doubles > glsl/ir: Add builtin constant function support for doubles > glsl/ir: Add builder support for functions with double floats > glsl: Add support doubles in optimization passes > glsl: Add ubo lowering support for doubles > glsl/ast: Support double floats > glsl/parser: Support double floats > glsl/lexer: Support double floats > glsl: Support double inouts > glsl: Support double loop control > glsl: Linking support for doubles > glsl: add double support to lower_mat_op_to_vec > glsl: enable/disable certain lowering passes for doubles > glsl/lower_instructions: add double lowering passes > glsl: implement double builtin functions > glsl: lower double optional passes (v2) > > Ilia Mirkin (3): > glsl: Add double builtin type > glsl: add a lowering pass for frexp/ldexp with double arguments > glsl/tests: add DOUBLE/IMAGE types > > Tapani Pälli (1): > glsl: validate output types for shader stages > > src/glsl/ast.h | 2 + > src/glsl/ast_function.cpp | 66 +- > src/glsl/ast_to_hir.cpp| 78 ++- > src/glsl/builtin_functions.cpp | 751 > ++--- > src/glsl/builtin_type_macros.h | 16 + > src/glsl/builtin_types.cpp | 30 + > src/glsl/glcpp/glcpp-parse.y | 3 + > src/glsl/glsl_lexer.ll | 31 +- > src/glsl/glsl_parser.yy| 33 +- > src/glsl/glsl_parser_extras.cpp| 5 + > src/glsl/glsl_parser_extras.h | 7 + > src/glsl/glsl_types.cpp| 109 ++- > src/glsl/glsl_types.h | 20 +- > src/glsl/ir.cpp| 111 ++- > src/glsl/ir.h | 22 + > src/glsl/ir_builder.cpp| 23 + > src/glsl/ir_builder.h | 5 + > src/glsl/ir_clone.cpp | 1 + > src/glsl/ir_constant_expression.cpp| 247 ++- > src/glsl/ir_optimization.h | 2 + > src/glsl/ir_print_visitor.cpp | 11 + > src/glsl/ir_set_program_inouts.cpp | 28 +- > src/glsl/ir_validate.cpp | 72 +- > src/glsl/link_uniform_initializers.cpp | 8 +- > src/glsl/link_uniforms.cpp | 9 +- > src/glsl/link_varyings.cpp | 3 +- > src/glsl/loop_controls.cpp | 8 +- > src/glsl/lower_instructions.cpp| 520 +- > src/glsl/lower_mat_op_to_vec.cpp | 2 + > src/glsl/lower_ubo_reference.cpp | 18 +- > src/glsl/opt_algebraic.cpp | 26 +- > src/glsl/opt_constant_propagation.cpp | 3 + > src/glsl/opt_minmax.cpp| 13 + > src/glsl/standalone_scaffolding.cpp| 1 + > src/glsl/tests/uniform_initializer_utils.cpp | 12 +
Re: [Mesa-dev] Expected wide line rendering with clipping
Am 09.02.2015 um 09:53 schrieb Iago Toral: > On Fri, 2015-02-06 at 21:27 +0100, Roland Scheidegger wrote: >> Am 06.02.2015 um 13:11 schrieb Iago Toral: >>> Hi, >>> >>> Eduardo and I have been looking into a few dEQP test failures that deal >>> with wide line rendering. There are a few of them that fail because of >>> how clipping is implemented for this case. >>> >>> The problem in these cases seems to be that the hw renders the wide line >>> as a parallelogram so that if an edge of the parallelogram is partially >>> clipped, the other parallel edge will also be clipped at the same >>> height/width so that the resulting wide line stays a parallelogram. The >>> dEQP renders as if a wide line where a collection of 1px lines, so >>> cliping one edge of the resulting line does not have implications for >>> the other edge. >>> >>> This ASCII art illustrates the problem (* represent line pixels, | >>> represents the viewport's rightmost edge): >>> >>> Expected by dEQP i965 rendering >>> | | >>> *| | >>>**| | >>> ***| | >>> | | >>> | | >>> | | >>> >>> We can make the rendering result match dEQP's expectation by enabling >>> the GuardBand in the clip stage (GEN6_CLIP_GB_TEST). This is being >>> disabled by the driver for gen7 in this scenario because the underlying >>> framebuffer surface is larger than the viewport, and per the comment in >>> the code, in gen7 that would make it render beyond the viewport limits >>> in that surface, while it notes that gen8 hw fixes this. So I guess that >>> we do not want to do this in any case in gen7. >>> >>> Then, I have been reviewing the OpenGL specs to see if they clarify what >>> should happen when clipping wide lines and I think the spec does not >>> make a clear point, since when it talks about line clipping it does not >>> cover the case of wide lines specifically: >>> >>> "13.5. PRIMITIVE CLIPPING >>> ... >>> If part of the line segment lies in the volume and part lies outside, >>> then the line segment is clipped and new vertex coordinates are computed >>> for one or both vertices. A clipped line segment endpoint lies on both >>> the original line segment and the boundary of the clip volume. >>> ... >>> " >>> >>> The above description is clear for 1px lines, but where should the new >>> vertex be generated exactly for wide lines, where many points could be >>> at the boundary of the clip volume? Is any point valid? (that would mean >>> the dEQP test is bogus because there are multiple accepted renderings), >>> should the new vertex be exactly at the center of the line? (that would >>> make both deqp and intel rendering bogus). >>> >>> Then there is also this comment in "14.5.2.2 Wide Lines" inside the >>> non-AA line rasterization chapter: >>> >>> "Non-antialiased line segments of width other than one are rasterized by >>> off-setting them in the minor direction (for an x-major line, the minor >>> direction is y, and for a y-major line, the minor direction is x) and >>> replicating fragments in the minor direction (see figure 14.3). Let w be >>> the width rounded to the nearest integer (if w = 0, then it is as if w = >>> 1). If the line segment has endpoints given by (x 0 , y 0 ) and (x 1 , y >>> 1 ) in window coordinates, the segment with endpoints (x 0 , y 0 − (w − >>> 1)/2) and (x 1 , y 1 − (w − 1)/2) is rasterized, but instead of a single >>> fragment, a column of fragments of height w (a row of fragments of >>> length w for a y-major segment) is produced at each x (y for y-major) >>> location. The lowest fragment of this column is the fragment that would >>> be produced by rasterizing the segment of width 1 with the modified >>> coordinates." >>> >>> This "suggests" that wide line rendering should be equivalent to >>> rendering multiple lines of width=1 and if we actually did that then the >>> expected rendering result after clipping would be what dEQP expects. >>> >>> I think the whole thing isn't 100% clear. Does anyone have any thoughts >>> on what should be considered correct behavior in this case? >>> >>> Iago >>> >>> >> >> I am quite sure that i965 rendering is what (at least legacy GL) >> expects. This is because the wide-line expansion is defined to be >> happening at rasterization time, i.e. past clipping. Thus, at clipping >> time, no matter if a line is wide or not, it is still really a line, >> hence clipping will produce a single new endpoint at the viewport edge >> and in rasterization this is then what will be used as the endpoint. > > That makes sense, but in that case I think i965 rendering would still be > incorrect, the expected result would look something like this: > > | > | > **| >***| > | > | > | > > However, as far as I can tell, since i965 hardware renders wi
[Mesa-dev] [PATCH 3/3] dri/common: Fix returned value of __DRI2_RENDERER_PREFERRED_PROFILE
If the renderer supports the core profile the query returned incorrectly 0x8 as value, because it was using (1U << __DRI_API_OPENGL_CORE) for the returned value. The same happened with the compatibility profile. It returned 0x1 (1U << __DRI_API_OPENGL) instead of 0x2. Used defines for the incorrect behavior: dri_interface.h: #define __DRI_API_OPENGL 0 dri_interface.h: #define __DRI_API_OPENGL_CORE 3 Now we return GLX_CONTEXT_CORE_PROFILE_BIT_ARB (0x1) for a preferred core context profile and GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB (0x2) for a preferred compatibility context profile. Cc: "10.3 10.4 10.5" Signed-off-by: Andreas Boll --- I've noticed this wrong behavior while testing Adam's glxinfo changes [1] (glxinfo: Add support for GLX_MESA_query_renderer) For reproducing this bug and/or testing this and the previous patch just run glxinfo | grep "Preferred profile" Andreas. [1] http://cgit.freedesktop.org/mesa/demos/commit/?id=999b6950c644266bb871e79438751bdba2fa2a08 src/mesa/drivers/dri/common/utils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index ccdc971..08e0d79 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -522,7 +522,8 @@ driQueryRendererIntegerCommon(__DRIscreen *psp, int param, unsigned int *value) } case __DRI2_RENDERER_PREFERRED_PROFILE: value[0] = (psp->max_gl_core_version != 0) - ? (1U << __DRI_API_OPENGL_CORE) : (1U << __DRI_API_OPENGL); + ? GLX_CONTEXT_CORE_PROFILE_BIT_ARB + : GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB; return 0; case __DRI2_RENDERER_OPENGL_CORE_PROFILE_VERSION: value[0] = psp->max_gl_core_version / 10; -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] mesa: Redefine GLX_CONTEXT_{CORE|COMPATIBILITY}_PROFILE_BIT_ARB
GLX_MESA_query_renderer uses these values for its query GLX_RENDERER_PREFERRED_PROFILE_MESA. Since GLX header files may not be available everywhere they need to be used, redefine them here. This is needed for the next patch. Cc: "10.3 10.4 10.5" Signed-off-by: Andreas Boll --- I hope this is the right solution to use these defines in src/mesa/drivers/dri/common/utils.c since the above defines like GLX_DONT_CARE or GLX_NONE do the same. Andreas. src/mesa/main/glheader.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 7f7f9a3..3151f75 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -162,6 +162,12 @@ typedef void *GLeglImageOES; #define GLX_DONT_CARE 0x +/* GLX_MESA_query_renderer uses these values for its query + * GLX_RENDERER_PREFERRED_PROFILE_MESA. Since GLX header files may not be + * available everywhere they need to be used, redefine them here. + */ +#define GLX_CONTEXT_CORE_PROFILE_BIT_ARB 0x0001 +#define GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x0002 #ifdef __cplusplus } -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] dri/common: Update comment about driQueryRendererIntegerCommon
Since 87d3ae0b45b6b6bb50b583dafa55eb109449a005 driQueryRendererIntegerCommon handles __DRI2_RENDERER_PREFFERED_PROFILE too. Signed-off-by: Andreas Boll --- src/mesa/drivers/dri/common/utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index f2e63c0..ccdc971 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -485,6 +485,7 @@ driIndexConfigAttrib(const __DRIconfig *config, int index, * Currently only the following queries are supported by this function: * * - \c __DRI2_RENDERER_VERSION + * - \c __DRI2_RENDERER_PREFERRED_PROFILE * - \c __DRI2_RENDERER_OPENGL_CORE_PROFILE_VERSION * - \c __DRI2_RENDERER_OPENGL_COMPATIBLITY_PROFILE_VERSION * - \c __DRI2_RENDERER_ES_PROFILE_VERSION -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/32] i965/fs: Fix stack allocation of fs_inst and stop stealing src array provided on construction.
Matt Turner writes: > On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez wrote: >> Using 'ralloc*(this, ...)' is wrong if the object has automatic >> storage or was allocated through any other means. Use normal dynamic >> memory instead. >> --- > > I don't see any places we were allocating an fs_inst via a mechanism > other than ralloc. Does one exist, or is this commit in preparation > for doing so? > The latter, you're right that right now all fs_inst allocations are done using ralloc. > I understand that in general "stealing" the source array might be > somewhat unexpected, but as far as I know we do this with > load_payload, where it seems pretty clear. Is this just an issue of > taste? I don't think it's just a matter of taste. It actually took me hours of debugging to find out what was corrupting random memory in my program. The cause turned out to be that I was passing a stack-allocated array as the src argument of LOAD_PAYLOAD() (which gives no indication of the argument having such particular memory semantics), the array is then silently mutated by fs_inst::init() and bound to fs_inst:src with no consideration of what the caller had in mind to do with it afterwards. Later on my original stack allocation goes out of scope and the sources of the instruction end up getting filled with garbage (and worst-case the stack could get corrupted if someone modifies the sources of the instruction later on). You might argue against stack-allocating the source array, but it's by far the most convenient (just see the number of lines of code saved in the other fs_inst constructors by allocating the sources in the stack, which allows you to use aggregate initializer syntax), cache-local and memory-efficient (as the array allocation is released as soon as it goes out of scope rather than letting it leak until the end of the compilation). For these reasons it's likely to happen again to someone else in the future, I already learnt the lesson the hard way. The other reason against using ralloc to allocate fs_insts is that it's ridiculously memory-inefficient. Each fs_inst allocation done using ralloc requires 80 bytes (96 bytes in debug mode) of extra space for the two header structures (one for the fs_inst itself and another one for the source array), that's almost as much as the fs_inst structure itself (104 bytes), and the whole thing leaks until the compilation terminates even if as it frequently happens the instruction is eliminated along the way by some optimization pass. pgpHS9K2ZL3jy.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 14/32] i965/fs: Fix register coalesce not to lose track of the second half of 16-wide moves.
Matt Turner writes: > On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez wrote: >> Fixes rewrite by the register coalesce pass of references to >> individual halves of 16-wide coalesced registers. >> --- >> src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp | 8 ++-- >> 1 file changed, 6 insertions(+), 2 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> index 09f0fad..2a26a46 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp >> @@ -211,9 +211,13 @@ fs_visitor::register_coalesce() >> continue; >> } >> reg_to_offset[offset] = inst->dst.reg_offset; >> - if (inst->src[0].width == 16) >> -reg_to_offset[offset + 1] = inst->dst.reg_offset + 1; >> mov[offset] = inst; >> + >> + if (inst->exec_size * type_sz(inst->src[0].type) > REG_SIZE) { >> +reg_to_offset[offset + 1] = inst->dst.reg_offset + 1; >> +mov[offset + 1] = inst; >> + } >> + >> channels_remaining -= inst->regs_written; >>} >> >> -- >> 2.1.3 > > I can believe it. It would help me to understand if we had an example > of a sequence of instructions that this code didn't handle properly. The problem is in the "rewrite" phase of the register coalesce pass (roughly lines 264-283). It won't fix up instructions that reference some specific offset of the coalesced register if mov[i] is NULL for that offset, as is the case for the second half of a 16-wide move. For example: | ADD (16) vgrf0:f, vgrf0:f, 1.0:f | MOV (16) vgrf1:f, vgrf0:f | MOV (8) vgrf2:f, vgrf0+1:f { sechalf } will get incorrectly register-coalesced into: | ADD (16) vgrf1:f, vgrf1:f, 1.0:f | MOV (8) vgrf2:f, vgrf0+1:f { sechalf } pgp5i_CmC3EKd.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/6] main: Added entry point for glTransformFeedbackBufferBase
v2: Review from Laura Ekstrand - give more helpful error messages - factor the lookup code for the xfb and objBuf - replace some already-existing tabs with spaces - add comments to explain the cases where xfb == 0 or buffer == 0 - fix the condition for binding the transform buffer or not v3: Review from Laura Ekstrand - rename _mesa_lookup_bufferobj_err to _mesa_lookup_transform_feedback_bufferobj_err and make it static to avoid a future conflict - make _mesa_lookup_transform_feedback_object_err static v4: Review from Laura Ekstrand - add the pdf page number when quoting the spec - rename some of the symbols to follow the public/private conventions Signed-off-by: Martin Peres --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 6 + src/mesa/main/bufferobj.c | 9 +- src/mesa/main/objectlabel.c| 2 +- src/mesa/main/tests/dispatch_sanity.cpp| 1 + src/mesa/main/transformfeedback.c | 146 +++-- src/mesa/main/transformfeedback.h | 13 ++- src/mesa/vbo/vbo_exec_array.c | 8 +- 7 files changed, 139 insertions(+), 46 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 15b00c2..35d6906 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -14,6 +14,12 @@ + + + + + + diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 0c1ce98..86532ea 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -3546,8 +3546,9 @@ _mesa_BindBufferRange(GLenum target, GLuint index, switch (target) { case GL_TRANSFORM_FEEDBACK_BUFFER: - _mesa_bind_buffer_range_transform_feedback(ctx, index, bufObj, -offset, size); + _mesa_bind_buffer_range_transform_feedback(ctx, + ctx->TransformFeedback.CurrentObject, + index, bufObj, offset, size); return; case GL_UNIFORM_BUFFER: bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size); @@ -3611,7 +3612,9 @@ _mesa_BindBufferBase(GLenum target, GLuint index, GLuint buffer) switch (target) { case GL_TRANSFORM_FEEDBACK_BUFFER: - _mesa_bind_buffer_base_transform_feedback(ctx, index, bufObj); + _mesa_bind_buffer_base_transform_feedback(ctx, + ctx->TransformFeedback.CurrentObject, +index, bufObj, false); return; case GL_UNIFORM_BUFFER: bind_buffer_base_uniform_buffer(ctx, index, bufObj); diff --git a/src/mesa/main/objectlabel.c b/src/mesa/main/objectlabel.c index 78df96b..19a7e59 100644 --- a/src/mesa/main/objectlabel.c +++ b/src/mesa/main/objectlabel.c @@ -170,7 +170,7 @@ get_label_pointer(struct gl_context *ctx, GLenum identifier, GLuint name, case GL_TRANSFORM_FEEDBACK: { struct gl_transform_feedback_object *tfo = -_mesa_lookup_transform_feedback_object(ctx, name); +lookup_transform_feedback_object(ctx, name); if (tfo) labelPtr = &tfo->Label; } diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index bf320bf..183755f 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -956,6 +956,7 @@ const struct function gl_core_functions_possible[] = { /* GL_ARB_direct_state_access */ { "glCreateTransformFeedbacks", 45, -1 }, + { "glTransformFeedbackBufferBase", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 10bb6a1..d932943 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -514,22 +514,24 @@ _mesa_EndTransformFeedback(void) * Helper used by BindBufferRange() and BindBufferBase(). */ static void -bind_buffer_range(struct gl_context *ctx, GLuint index, +bind_buffer_range(struct gl_context *ctx, + struct gl_transform_feedback_object *obj, + GLuint index, struct gl_buffer_object *bufObj, - GLintptr offset, GLsizeiptr size) + GLintptr offset, GLsizeiptr size, + bool dsa) { - struct gl_transform_feedback_object *obj = - ctx->TransformFeedback.CurrentObject; - /* Note: no need to FLUSH_VERTICES or flag NewTransformFeedback, because * transform feedback buffers can't be changed while transform feedback is * active. */ - /* The general binding point */ - _mesa_reference_buffer_object(ctx, - &ctx->Tran
[Mesa-dev] [PATCH 1/6] main: Added entry point for glCreateTransformFeedbacks
v2: Review from Laura Ekstrand - generate the name of the gl method once - shorten some lines to stay in the 78 chars limit v3: Review from Fredrik Höglund - rename gl_mthd_name to func - set EverBound in _mesa_create_transform_feedbacks in the dsa case v4: - rename _mesa_create_transform_feedbacks to create_transform_feedbacks and make it static Signed-off-by: Martin Peres --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 7 +++ src/mesa/main/tests/dispatch_sanity.cpp| 1 + src/mesa/main/transformfeedback.c | 67 -- src/mesa/main/transformfeedback.h | 3 ++ 4 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 2fe1638..15b00c2 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -7,6 +7,13 @@ + + + + + + + diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 1f1a3a8..bf320bf 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -955,6 +955,7 @@ const struct function gl_core_functions_possible[] = { { "glClipControl", 45, -1 }, /* GL_ARB_direct_state_access */ + { "glCreateTransformFeedbacks", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index a737463..10bb6a1 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -825,25 +825,24 @@ _mesa_lookup_transform_feedback_object(struct gl_context *ctx, GLuint name) _mesa_HashLookup(ctx->TransformFeedback.Objects, name); } - -/** - * Create new transform feedback objects. Transform feedback objects - * encapsulate the state related to transform feedback to allow quickly - * switching state (and drawing the results, below). - * Part of GL_ARB_transform_feedback2. - */ -void GLAPIENTRY -_mesa_GenTransformFeedbacks(GLsizei n, GLuint *names) +static void +create_transform_feedbacks(struct gl_context *ctx, GLsizei n, GLuint *ids, + bool dsa) { GLuint first; - GET_CURRENT_CONTEXT(ctx); + const char* func; + + if (dsa) + func = "glCreateTransformFeedbacks"; + else + func = "glGenTransformFeedbacks"; if (n < 0) { - _mesa_error(ctx, GL_INVALID_VALUE, "glGenTransformFeedbacks(n < 0)"); + _mesa_error(ctx, GL_INVALID_VALUE, "%s(n < 0)", func); return; } - if (!names) + if (!ids) return; /* we don't need contiguous IDs, but this might be faster */ @@ -854,18 +853,56 @@ _mesa_GenTransformFeedbacks(GLsizei n, GLuint *names) struct gl_transform_feedback_object *obj = ctx->Driver.NewTransformFeedback(ctx, first + i); if (!obj) { -_mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTransformFeedbacks"); +_mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); return; } - names[i] = first + i; + ids[i] = first + i; _mesa_HashInsert(ctx->TransformFeedback.Objects, first + i, obj); + if (dsa) { +/* this is normally done at bind time in the non-dsa case */ +obj->EverBound = GL_TRUE; + } } } else { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTransformFeedbacks"); + _mesa_error(ctx, GL_OUT_OF_MEMORY, "%s", func); } } +/** + * Create new transform feedback objects. Transform feedback objects + * encapsulate the state related to transform feedback to allow quickly + * switching state (and drawing the results, below). + * Part of GL_ARB_transform_feedback2. + */ +void GLAPIENTRY +_mesa_GenTransformFeedbacks(GLsizei n, GLuint *names) +{ + GET_CURRENT_CONTEXT(ctx); + + /* GenTransformFeedbacks should just reserve the object names that a +* subsequent call to BindTransformFeedback should actively create. For +* the sake of simplicity, we reserve the names and create the objects +* straight away. +*/ + + create_transform_feedbacks(ctx, n, names, false); +} + +/** + * Create new transform feedback objects. Transform feedback objects + * encapsulate the state related to transform feedback to allow quickly + * switching state (and drawing the results, below). + * Part of GL_ARB_direct_state_access. + */ +void GLAPIENTRY +_mesa_CreateTransformFeedbacks(GLsizei n, GLuint *names) +{ + GET_CURRENT_CONTEXT(ctx); + + create_transform_feedbacks(ctx, n, names, true); +} + /** * Is the given ID a transform feedback object? diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index 87f4080..9de1fef 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -102,6
[Mesa-dev] [PATCH 5/6] main: Added entry point for glGetTransformFeedbacki_v
v2: Review from Laura Ekstrand - use the transform feedback object lookup wrapper v3: - use the new name of _mesa_lookup_transform_feedback_object_err Signed-off-by: Martin Peres --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 7 +++ src/mesa/main/tests/dispatch_sanity.cpp| 1 + src/mesa/main/transformfeedback.c | 29 ++ src/mesa/main/transformfeedback.h | 4 4 files changed, 41 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 1ac4386..935e088 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -34,6 +34,13 @@ + + + + + + + diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index ba36d28..d5535e9 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -959,6 +959,7 @@ const struct function gl_core_functions_possible[] = { { "glTransformFeedbackBufferBase", 45, -1 }, { "glTransformFeedbackBufferRange", 45, -1 }, { "glGetTransformFeedbackiv", 45, -1 }, + { "glGetTransformFeedbacki_v", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 9c9cba2..ce9a6d4 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -1228,3 +1228,32 @@ _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param) "glGetTransformFeedbackiv(pname=%i)", pname); } } + +extern void GLAPIENTRY +_mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, + GLint *param) +{ + struct gl_transform_feedback_object *obj; + GET_CURRENT_CONTEXT(ctx); + + obj = lookup_transform_feedback_object_err(ctx, xfb, + "glGetTransformFeedbacki_v"); + if(!obj) { + return; + } + + if (index >= ctx->Const.MaxTransformFeedbackBuffers) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glGetTransformFeedbacki_v(index=%i)", index); + return; + } + + switch(pname) { + case GL_TRANSFORM_FEEDBACK_BUFFER_BINDING: + *param = obj->BufferNames[index]; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetTransformFeedbacki_v(pname=%i)", pname); + } +} diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index ba00636..9936c68 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -158,4 +158,8 @@ _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint index, GLuint buffer, extern void GLAPIENTRY _mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param); +extern void GLAPIENTRY +_mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, + GLint *param); + #endif /* TRANSFORM_FEEDBACK_H */ -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/6] main: Added entry point for glGetTransformFeedbacki64_v
v2: Review from Laura Ekstrand - use the transform feedback object lookup wrapper v3: - use the new name of _mesa_lookup_transform_feedback_object_err Signed-off-by: Martin Peres --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 7 ++ src/mesa/main/tests/dispatch_sanity.cpp| 1 + src/mesa/main/transformfeedback.c | 32 ++ src/mesa/main/transformfeedback.h | 4 4 files changed, 44 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 935e088..340dbba 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -41,6 +41,13 @@ + + + + + + + diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index d5535e9..ad5da83 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -960,6 +960,7 @@ const struct function gl_core_functions_possible[] = { { "glTransformFeedbackBufferRange", 45, -1 }, { "glGetTransformFeedbackiv", 45, -1 }, { "glGetTransformFeedbacki_v", 45, -1 }, + { "glGetTransformFeedbacki64_v", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index ce9a6d4..0b6d04b 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -1257,3 +1257,35 @@ _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, "glGetTransformFeedbacki_v(pname=%i)", pname); } } + +extern void GLAPIENTRY +_mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index, + GLint64 *param) +{ + struct gl_transform_feedback_object *obj; + GET_CURRENT_CONTEXT(ctx); + + obj = lookup_transform_feedback_object_err(ctx, xfb, + "glGetTransformFeedbacki64_v"); + if(!obj) { + return; + } + + if (index >= ctx->Const.MaxTransformFeedbackBuffers) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glGetTransformFeedbacki64_v(index=%i)", index); + return; + } + + switch(pname) { + case GL_TRANSFORM_FEEDBACK_BUFFER_START: + *param = obj->Offset[index]; + break; + case GL_TRANSFORM_FEEDBACK_BUFFER_SIZE: + *param = obj->RequestedSize[index]; + break; + default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetTransformFeedbacki64_v(pname=%i)", pname); + } +} diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index 9936c68..3d21779 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -162,4 +162,8 @@ extern void GLAPIENTRY _mesa_GetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, GLint *param); +extern void GLAPIENTRY +_mesa_GetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index, + GLint64 *param); + #endif /* TRANSFORM_FEEDBACK_H */ -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/6] Add 6 transform-feedback-related entry points for direct_state_access v2
The updated version of my series. I think I answered everyone's comments on the previous series. The corresponding piglit tests have been submited to the piglit ML too. No regression spotted by piglit when applying this series. Warning: Those tests will only work when forcing mesa to expose the DSA extension and when using enable_ppgtt=2. Only gen7 has been tested. Martin Peres (6): main: Added entry point for glCreateTransformFeedbacks main: Added entry point for glTransformFeedbackBufferBase main: Added entry point for glTransformFeedbackBufferRange main: Added entry point for glGetTransformFeedbackiv main: Added entry point for glGetTransformFeedbacki_v main: Added entry point for glGetTransformFeedbacki64_v src/mapi/glapi/gen/ARB_direct_state_access.xml | 41 +++ src/mesa/main/bufferobj.c | 10 +- src/mesa/main/objectlabel.c| 2 +- src/mesa/main/tests/dispatch_sanity.cpp| 6 + src/mesa/main/transformfeedback.c | 375 + src/mesa/main/transformfeedback.h | 33 ++- src/mesa/vbo/vbo_exec_array.c | 8 +- 7 files changed, 407 insertions(+), 68 deletions(-) -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/6] main: Added entry point for glGetTransformFeedbackiv
v2: Review from Laura Ekstrand - use the transform feedback object lookup wrapper Signed-off-by: Martin Peres --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 6 ++ src/mesa/main/tests/dispatch_sanity.cpp| 1 + src/mesa/main/transformfeedback.c | 25 + src/mesa/main/transformfeedback.h | 3 +++ 4 files changed, 35 insertions(+) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index b3c090f..1ac4386 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -28,6 +28,12 @@ + + + + + + diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 87f7d6f..ba36d28 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -958,6 +958,7 @@ const struct function gl_core_functions_possible[] = { { "glCreateTransformFeedbacks", 45, -1 }, { "glTransformFeedbackBufferBase", 45, -1 }, { "glTransformFeedbackBufferRange", 45, -1 }, + { "glGetTransformFeedbackiv", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index 2dded21..9c9cba2 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -1203,3 +1203,28 @@ _mesa_ResumeTransformFeedback(void) assert(ctx->Driver.ResumeTransformFeedback); ctx->Driver.ResumeTransformFeedback(ctx, obj); } + +extern void GLAPIENTRY +_mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param) +{ +struct gl_transform_feedback_object *obj; +GET_CURRENT_CONTEXT(ctx); + +obj = lookup_transform_feedback_object_err(ctx, xfb, + "glGetTransformFeedbackiv"); +if(!obj) { + return; +} + +switch(pname) { +case GL_TRANSFORM_FEEDBACK_PAUSED: + *param = obj->Paused; + break; +case GL_TRANSFORM_FEEDBACK_ACTIVE: + *param = obj->Active; + break; +default: + _mesa_error(ctx, GL_INVALID_ENUM, + "glGetTransformFeedbackiv(pname=%i)", pname); +} +} diff --git a/src/mesa/main/transformfeedback.h b/src/mesa/main/transformfeedback.h index 6cad766..ba00636 100644 --- a/src/mesa/main/transformfeedback.h +++ b/src/mesa/main/transformfeedback.h @@ -155,4 +155,7 @@ extern void GLAPIENTRY _mesa_TransformFeedbackBufferRange(GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +extern void GLAPIENTRY +_mesa_GetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint *param); + #endif /* TRANSFORM_FEEDBACK_H */ -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/6] main: Added entry point for glTransformFeedbackBufferRange
v2: review from Laura Ekstrand - use the refactored code to lookup the objects - improve some error messages - factor out the gl method name computation - better handle the spec differences between the DSA and non-DSA cases - quote the spec a little more v3: review from Laura Ekstrand - use the new name of _mesa_lookup_bufferobj_err - swap the comments around the offset and size checks v4: review from Laura Ekstrand - add more spec quotes - properly fix the comments around the offset and size checks Signed-off-by: Martin Peres --- src/mapi/glapi/gen/ARB_direct_state_access.xml | 8 +++ src/mesa/main/bufferobj.c | 3 +- src/mesa/main/tests/dispatch_sanity.cpp| 1 + src/mesa/main/transformfeedback.c | 98 +- src/mesa/main/transformfeedback.h | 6 +- 5 files changed, 97 insertions(+), 19 deletions(-) diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml b/src/mapi/glapi/gen/ARB_direct_state_access.xml index 35d6906..b3c090f 100644 --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml @@ -20,6 +20,14 @@ + + + + + + + + diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 86532ea..7558e17 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -3548,7 +3548,8 @@ _mesa_BindBufferRange(GLenum target, GLuint index, case GL_TRANSFORM_FEEDBACK_BUFFER: _mesa_bind_buffer_range_transform_feedback(ctx, ctx->TransformFeedback.CurrentObject, - index, bufObj, offset, size); + index, bufObj, offset, size, + false); return; case GL_UNIFORM_BUFFER: bind_buffer_range_uniform_buffer(ctx, index, bufObj, offset, size); diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 183755f..87f7d6f 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -957,6 +957,7 @@ const struct function gl_core_functions_possible[] = { /* GL_ARB_direct_state_access */ { "glCreateTransformFeedbacks", 45, -1 }, { "glTransformFeedbackBufferBase", 45, -1 }, + { "glTransformFeedbackBufferRange", 45, -1 }, { "glCreateTextures", 45, -1 }, { "glTextureStorage1D", 45, -1 }, { "glTextureStorage2D", 45, -1 }, diff --git a/src/mesa/main/transformfeedback.c b/src/mesa/main/transformfeedback.c index d932943..2dded21 100644 --- a/src/mesa/main/transformfeedback.c +++ b/src/mesa/main/transformfeedback.c @@ -541,7 +541,8 @@ bind_buffer_range(struct gl_context *ctx, /** * Specify a buffer object to receive transform feedback results. Plus, * specify the starting offset to place the results, and max size. - * Called from the glBindBufferRange() function. + * Called from the glBindBufferRange() and glTransformFeedbackBufferRange + * functions. */ void _mesa_bind_buffer_range_transform_feedback(struct gl_context *ctx, @@ -549,35 +550,74 @@ _mesa_bind_buffer_range_transform_feedback(struct gl_context *ctx, GLuint index, struct gl_buffer_object *bufObj, GLintptr offset, - GLsizeiptr size) + GLsizeiptr size, + bool dsa) { + const char *gl_methd_name; + if (dsa) + gl_methd_name = "glTransformFeedbackBufferRange"; + else + gl_methd_name = "glBindBufferRange"; + + if (obj->Active) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glBindBufferRange(transform feedback active)"); + _mesa_error(ctx, GL_INVALID_OPERATION, "%s(transform feedback active)", + gl_methd_name); return; } if (index >= ctx->Const.MaxTransformFeedbackBuffers) { - _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(index=%d " - "out of bounds)", index); + /* OpenGL 4.5 core profile, 6.1, pdf page 82: An INVALID_VALUE error is + * generated if index is greater than or equal to the number of binding + * points for transform feedback, as described in section 6.7.1. + */ + _mesa_error(ctx, GL_INVALID_VALUE, "%s(index=%d out of bounds)", + gl_methd_name, index); return; } if (size & 0x3) { - /* must a multiple of four */ - _mesa_error(ctx, GL_INVALID_VALUE, "glBindBufferRange(size=%d)", - (int) size); + /* OpenGL 4.5 core profile, 6.7, pdf page 103: multiple of 4 */ + _mesa_error(ctx, GL_INVALID_VALUE, "%s(size=%d must be a multiple of " + "four)", gl_me
Re: [Mesa-dev] [PATCH 18/32] i965/vec4: Don't attempt to reduce swizzles of send from GRF instructions.
Matt Turner writes: > On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez wrote: >> --- >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 ++- >> 1 file changed, 2 insertions(+), 1 deletion(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> index c3f68e6..aaa4873 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> @@ -437,7 +437,8 @@ vec4_visitor::opt_reduce_swizzle() >> bool progress = false; >> >> foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) { >> - if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG) >> + if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG || >> + inst->is_send_from_grf()) >> continue; >> >>int swizzle[4]; >> -- >> 2.1.3 > > Like the last patch, I totally believe this is a problem, I just want > a little more explanation for how it was uncovered. The problem is that it's not safe for opt_reduce_swizzle() to assume that there is a one-to-one correspondence between vec4 channels of its sources and its destination for send instructions. For example, the message may include a header which has an effect on all channels. For example: | mov vgrf31.0:UD, 0D | mov vgrf31.0.w:UD, 17D | typed_surface_read vgrf33.0.x:UD, vgrf31.xyzw:UD, vgrf32.:UD, 1U will get incorrectly reduced to: | mov vgrf31.0:UD, 0D | mov vgrf31.0.w:UD, 17D | typed_surface_read vgrf33.0.x:UD, vgrf31.:UD, vgrf32.:UD, 1U now subsequent optimization passes will assume that only the x component of vgrf31 is used and will simplify the above code to: | mov vgrf31.0.x:UD, 0D | typed_surface_read vgrf33.0.x:UD, vgrf31.:UD, vgrf32.:UD, 1U pgpbayODW9xsk.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 19/32] i965/vec4: Pass dst register to the vec4_instruction constructor.
Matt Turner writes: > On Fri, Feb 6, 2015 at 6:42 AM, Francisco Jerez wrote: >> So regs_written gets initialized with a sensible value. >> --- >> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 11 +-- >> 1 file changed, 5 insertions(+), 6 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> index babddee..514de11 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp >> @@ -2384,12 +2384,11 @@ vec4_visitor::visit(ir_call *ir) >> src_reg >> vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg >> sampler) >> { >> - vec4_instruction *inst = new(mem_ctx) >> vec4_instruction(SHADER_OPCODE_TXF_MCS); >> + vec4_instruction *inst = >> + new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS, >> +dst_reg(this, glsl_type::uvec4_type)); >> inst->base_mrf = 2; >> inst->mlen = 1; >> - inst->dst = dst_reg(this, glsl_type::uvec4_type); >> - inst->dst.writemask = WRITEMASK_XYZW; >> - >> inst->src[1] = sampler; >> >> /* parameters are: u, v, r, lod; lod will always be zero due to api >> restrictions */ >> @@ -2562,7 +2561,8 @@ vec4_visitor::visit(ir_texture *ir) >>unreachable("Unrecognized tex op"); >> } >> >> - vec4_instruction *inst = new(mem_ctx) vec4_instruction(opcode); >> + vec4_instruction *inst = new(mem_ctx) vec4_instruction( >> + opcode, dst_reg(this, ir->type)); >> >> if (ir->offset != NULL && !has_nonconstant_offset) { >>inst->offset = >> @@ -2587,7 +2587,6 @@ vec4_visitor::visit(ir_texture *ir) >>is_high_sampler(brw, sampler_reg); >> inst->base_mrf = 2; >> inst->mlen = inst->header_present + 1; /* always at least one */ >> - inst->dst = dst_reg(this, ir->type); >> inst->dst.writemask = WRITEMASK_XYZW; > > I think you probably want to delete this line too? > > If so, > > Reviewed-by: Matt Turner Thanks, fixed it locally. pgpJz0aNXbBm_.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 21/32] i965/vec4: Fix the scheduler to take into account reads and writes of multiple registers.
Matt Turner writes: > On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez wrote: >> --- >> src/mesa/drivers/dri/i965/brw_ir_vec4.h | 1 + >> src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++- >> src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 >> + >> 3 files changed, 28 insertions(+), 5 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> b/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> index f11a2d2..941086f 100644 >> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h >> @@ -171,6 +171,7 @@ public: >> unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header >> */ >> >> bool is_send_from_grf(); >> + unsigned regs_read(unsigned arg) const; >> bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask); >> void reswizzle(int dst_writemask, int swizzle); >> bool can_do_source_mods(struct brw_context *brw); >> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp >> b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp >> index 88feeca..2b22b2c 100644 >> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp >> @@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps() >>/* read-after-write deps. */ >>for (int i = 0; i < 3; i++) { >> if (inst->src[i].file == GRF) { >> -add_dep(last_grf_write[inst->src[i].reg], n); >> +for (unsigned j = 0; j < inst->regs_read(i); ++j) >> + add_dep(last_grf_write[inst->src[i].reg + j], n); >> } else if (inst->src[i].file == HW_REG && >> (inst->src[i].fixed_hw_reg.file == >> BRW_GENERAL_REGISTER_FILE)) { >> @@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps() >> >>/* write-after-write deps. */ >>if (inst->dst.file == GRF) { >> - add_dep(last_grf_write[inst->dst.reg], n); >> - last_grf_write[inst->dst.reg] = n; >> + for (unsigned j = 0; j < inst->regs_written; ++j) { >> +add_dep(last_grf_write[inst->dst.reg + j], n); >> +last_grf_write[inst->dst.reg + j] = n; >> + } >>} else if (inst->dst.file == MRF) { >> add_dep(last_mrf_write[inst->dst.reg], n); >> last_mrf_write[inst->dst.reg] = n; >> @@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps() >>/* write-after-read deps. */ >>for (int i = 0; i < 3; i++) { >> if (inst->src[i].file == GRF) { >> -add_dep(n, last_grf_write[inst->src[i].reg]); >> +for (unsigned j = 0; j < inst->regs_read(i); ++j) >> + add_dep(n, last_grf_write[inst->src[i].reg + j]); >> } else if (inst->src[i].file == HW_REG && >> (inst->src[i].fixed_hw_reg.file == >> BRW_GENERAL_REGISTER_FILE)) { >> @@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps() >> * can mark this as WAR dependency. >> */ >>if (inst->dst.file == GRF) { >> - last_grf_write[inst->dst.reg] = n; >> + for (unsigned j = 0; j < inst->regs_written; ++j) >> +last_grf_write[inst->dst.reg + j] = n; >>} else if (inst->dst.file == MRF) { >> last_mrf_write[inst->dst.reg] = n; >>} else if (inst->dst.file == HW_REG && >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> index a4fd136..b2f3151 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp >> @@ -262,6 +262,23 @@ vec4_instruction::is_send_from_grf() >> } >> } >> >> +unsigned >> +vec4_instruction::regs_read(unsigned arg) const >> +{ >> + switch (opcode) { >> + case SHADER_OPCODE_SHADER_TIME_ADD: >> + return (arg == 0 ? mlen : >> + src[arg].file != BAD_FILE ? 1 : 0); >> + >> + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: >> + return (arg == 1 ? mlen : >> + src[arg].file != BAD_FILE ? 1 : 0); >> + >> + default: >> + return (src[arg].file != BAD_FILE ? 1 : 0); >> + } >> +} > > Better to just do > >if (src[arg].file == BAD_FILE) > return 0; > > before the switch statement to avoid the nested ternary, or do you > actually need to test arg == 0/1 before checking .file in those two > cases? Yeah, I've fixed that locally, thanks. pgpWeqOJzKjir.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/14] i965: Don't tile 1D miptrees.
I made a similar patch in my local tree because it will be necessary for Skylake which doesn't support tiling for 1D textures. I made a little test to time rendering a large (4096) 1D texture here: https://github.com/bpeel/glthing/tree/time-1d-texture It gives about an 11% increase in FPS with this patch. It seems like a good idea to me to make it check the height rather than the texture dimensions as Kenneth says. Regards, - Neil Kenneth Graunke writes: > On Saturday, February 07, 2015 02:46:31 AM Francisco Jerez wrote: >> Kenneth Graunke writes: >> >> > On Friday, February 06, 2015 07:23:15 PM Francisco Jerez wrote: >> >> It doesn't really improve locality of texture fetches, quite the >> >> opposite it's a waste of memory bandwidth and space due to tile >> >> alignment. >> >> --- >> >> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 >> >> 1 file changed, 4 insertions(+) >> >> >> >> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> >> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> >> index 64752dd..311b204 100644 >> >> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> >> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c >> >> @@ -488,6 +488,10 @@ intel_miptree_choose_tiling(struct brw_context *brw, >> >> base_format == GL_DEPTH_STENCIL_EXT) >> >>return I915_TILING_Y; >> >> >> >> + if (mt->target == GL_TEXTURE_1D || >> >> + mt->target == GL_TEXTURE_1D_ARRAY) >> >> + return I915_TILING_NONE; >> >> + >> >> int minimum_pitch = mt->total_width * mt->cpp; >> >> >> >> /* If the width is much smaller than a tile, don't bother tiling. */ >> >> >> > >> > What do you think about checking mt->logical_height0 == 1 instead? >> > GLES doesn't have 1D textures, but people might use a 2D texture with >> > height 1, and I think the same logic applies. >> > >> > I've also been thinking of trying to make core Mesa hide 1D textures and >> > simply turn them into 2D textures with height = 1, so drivers don't have >> > to deal with them. >> > >> > Either way, this seems like a good idea to me. >> >> I guess that would work because the layer count of 1D array textures >> ends up in logical_depth0 rather than logical_height0? Sounds good to >> me, but it would also have the side effect of not tiling "degenerate" 3D >> textures with height=1 but depth>1, I guess we would still like to tile >> those? How about (mt->logical_height0 == 1 && (mt->logical_depth0 == 1 || >> mt->target != GL_TEXTURE_3D))? >> >> Thanks Ken! > > I suppose that's fine, though frankly, trying to optimize a texture with > height 1 at all seems pretty unnecessary to me. I've never seen a 3D > texture with height 1 in the wild... > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev pgpPZwNTC8EO1.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCHv2 32/32] i965: Don't compact instructions with unmapped bits.
Some instruction bits don't have a mapping defined to any compacted instruction field. If they're ever set and we end up compacting the instruction they will be forced to zero. Avoid using compaction in such cases. v2: Align multiple lines of an expression to the same column. Change conditional compaction of 3-source instructions to an assertion. (Matt) --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 46 ++ 1 file changed, 46 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 8e33bcb..f80bcc1 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -843,10 +843,53 @@ set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst * } static bool +has_unmapped_bits(struct brw_context *brw, brw_inst *src) +{ + /* Check for instruction bits that don't map to any of the fields of the +* compacted instruction. The instruction cannot be compacted if any of +* them are set. They overlap with: +* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) +* - Dst.AddrImm[9] (bit 47 on Gen8) +* - Src0.AddrImm[9] (bit 95 on Gen8) +*/ + if (brw->gen >= 8) + return brw_inst_bits(src, 95, 95) || + brw_inst_bits(src, 47, 47) || + brw_inst_bits(src, 11, 11) || + brw_inst_bits(src, 7, 7); + else + return brw_inst_bits(src, 95, 91) || + brw_inst_bits(src, 47, 47) || + brw_inst_bits(src, 7, 7) || + (brw->gen < 7 && brw_inst_bits(src, 90, 90)); +} + +static bool +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) +{ + /* Check for three-source instruction bits that don't map to any of the +* fields of the compacted instruction. All of them seem to be reserved +* bits currently. +*/ + assert(brw->gen >= 8); + if (brw->gen >= 9 || brw->is_cherryview) + return brw_inst_bits(src, 127, 127) || + brw_inst_bits(src, 105, 105) || + brw_inst_bits(src, 7, 7); + else + return brw_inst_bits(src, 127, 126) || + brw_inst_bits(src, 105, 105) || + brw_inst_bits(src, 84, 84) || + brw_inst_bits(src, 36, 35) || + brw_inst_bits(src, 7, 7); +} + +static bool brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) { assert(brw->gen >= 8); + assert(!has_3src_unmapped_bits(brw, src)); #define compact(field) \ brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src)) @@ -937,6 +980,9 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, return false; } + if (has_unmapped_bits(brw, src)) + return false; + memset(&temp, 0, sizeof(temp)); brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src)); -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCHv2 01/32] i965: Factor out virtual GRF allocation to a separate object.
Right now virtual GRF book-keeping and allocation is performed in each visitor class separately (among other hundred different things), leading to duplicated logic in each visitor and preventing layering as it forces any code that manipulates i965 IR and needs to allocate virtual registers to depend on the specific visitor that happens to be used to translate from GLSL IR. v2: Use realloc()/free() to allocate VGRF book-keeping arrays (Connor). Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_fs.cpp | 77 --- src/mesa/drivers/dri/i965/brw_fs.h | 8 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 2 +- .../drivers/dri/i965/brw_fs_live_variables.cpp | 8 +- src/mesa/drivers/dri/i965/brw_fs_live_variables.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 50 ++--- .../drivers/dri/i965/brw_fs_register_coalesce.cpp | 8 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 49 ++-- src/mesa/drivers/dri/i965/brw_ir_allocator.h | 87 ++ .../drivers/dri/i965/brw_schedule_instructions.cpp | 10 +-- src/mesa/drivers/dri/i965/brw_shader.h | 6 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 20 ++--- src/mesa/drivers/dri/i965/brw_vec4.h | 12 --- .../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 8 +- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 2 +- .../drivers/dri/i965/brw_vec4_live_variables.cpp | 10 +-- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 43 ++- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 36 ++--- 19 files changed, 237 insertions(+), 203 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_ir_allocator.h diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 83c09e1..3acbb0b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -424,7 +424,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, assert(dst.width % 8 == 0); int regs_written = 4 * (dst.width / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written), + fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type, dst.width); inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; @@ -688,7 +688,7 @@ fs_visitor::get_timestamp() 0), BRW_REGISTER_TYPE_UD)); - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4); + fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); fs_inst *mov = emit(MOV(dst, ts)); /* We want to read the 3 fields we care about even if it's not enabled in @@ -764,7 +764,7 @@ fs_visitor::emit_shader_time_end() fs_reg start = shader_start_time; start.negate = true; - fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1); + fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); emit(ADD(diff, start, shader_end_time)); /* If there were no instructions between the two timestamp gets, the diff @@ -1029,26 +1029,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) } } -int -fs_visitor::virtual_grf_alloc(int size) -{ - if (virtual_grf_array_size <= virtual_grf_count) { - if (virtual_grf_array_size == 0) -virtual_grf_array_size = 16; - else -virtual_grf_array_size *= 2; - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, - virtual_grf_array_size); - } - virtual_grf_sizes[virtual_grf_count] = size; - return virtual_grf_count++; -} - fs_reg fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width), + return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width), brw_type_for_base_type(type), dispatch_width); } @@ -1056,7 +1041,7 @@ fs_reg fs_visitor::vgrf(int num_components) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width), + return fs_reg(GRF, alloc.allocate(num_components * reg_width), BRW_REGISTER_TYPE_F, dispatch_width); } @@ -1912,14 +1897,14 @@ fs_visitor::assign_vs_urb_setup() void fs_visitor::split_virtual_grfs() { - int num_vars = this->virtual_grf_count; + int num_vars = this->alloc.count; /* Count the total number of registers */ int reg_count = 0; int vgrf_to_reg[num_vars]; for (int i = 0; i < num_vars; i++) { vgrf_to_reg[i] = reg_count; - reg_count += virtual_grf_sizes[i]; + reg_count += alloc.sizes[i]; } /* An array of "split points". For each register slot, this indicates @@ -1935,14 +
[Mesa-dev] [Bug 77449] Tracker bug for all bugs related to Steam titles
https://bugs.freedesktop.org/show_bug.cgi?id=77449 Bug 77449 depends on bug 84570, which changed state. Bug 84570 Summary: Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting https://bugs.freedesktop.org/show_bug.cgi?id=84570 What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 84570] Borderlands 2/Pre-Sequel: Constant frame rate drops while playing; really bad with additionl lighting
https://bugs.freedesktop.org/show_bug.cgi?id=84570 Kai changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #34 from Kai --- Then I'm closing this bug. Thanks to everybody for your various contributions! -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] i965/fs: Combine constants and unconditionally emit MADs
Hi, On 02/05/2015 06:21 AM, Matt Turner wrote: total instructions in shared programs: 5895414 -> 5747578 (-2.51%) instructions in affected programs: 3618111 -> 3470275 (-4.09%) helped:20492 HURT: 4449 GAINED:54 LOST: 146 and with NIR, that already emits MADs always: total instructions in shared programs: 7992936 -> 7772474 (-2.76%) instructions in affected programs: 3738730 -> 3518268 (-5.90%) helped:22082 HURT: 3445 GAINED:70 LOST: 78 There are some particularly exciting individual improvements too: kerbal-space-program/667: FS SIMD8: 1024 -> 561 (-45.21%) kerbal-space-program/676: FS SIMD8: 1021 -> 558 (-45.35%) From how old Kerbal version these shaders are from? I just noticed that recent Kerbal outputs large amount of register spill & alloc failure messages with INTEL_DEBUG=perf, whereas output I've saved from a last year Kerbal version, had much less of them. rocketbirds-hardboiled-chicken/fp-2: FS SIMD8: 1076 -> 582 (-45.91%) Changing the fs_visitor and fp backend to always emit MADs is a little like moving the goal posts for switching to NIR, but I think always emitting MADs this is the right direction, and of course we'll discover more improvements the more we work with the code. - Eero PS. output from games using Unity3D engine goes to: ~/.config/unity3d///Player.log Using Apitrace replay makes seeing the warnings easier. :-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Expected wide line rendering with clipping
On Mon, Feb 9, 2015 at 12:35 PM, Roland Scheidegger wrote: > Am 09.02.2015 um 09:53 schrieb Iago Toral: >> On Fri, 2015-02-06 at 21:27 +0100, Roland Scheidegger wrote: >>> Am 06.02.2015 um 13:11 schrieb Iago Toral: Hi, Eduardo and I have been looking into a few dEQP test failures that deal with wide line rendering. There are a few of them that fail because of how clipping is implemented for this case. The problem in these cases seems to be that the hw renders the wide line as a parallelogram so that if an edge of the parallelogram is partially clipped, the other parallel edge will also be clipped at the same height/width so that the resulting wide line stays a parallelogram. The dEQP renders as if a wide line where a collection of 1px lines, so cliping one edge of the resulting line does not have implications for the other edge. This ASCII art illustrates the problem (* represent line pixels, | represents the viewport's rightmost edge): Expected by dEQP i965 rendering | | *| | **| | ***| | | | | | | | We can make the rendering result match dEQP's expectation by enabling the GuardBand in the clip stage (GEN6_CLIP_GB_TEST). This is being disabled by the driver for gen7 in this scenario because the underlying framebuffer surface is larger than the viewport, and per the comment in the code, in gen7 that would make it render beyond the viewport limits in that surface, while it notes that gen8 hw fixes this. So I guess that we do not want to do this in any case in gen7. Then, I have been reviewing the OpenGL specs to see if they clarify what should happen when clipping wide lines and I think the spec does not make a clear point, since when it talks about line clipping it does not cover the case of wide lines specifically: "13.5. PRIMITIVE CLIPPING ... If part of the line segment lies in the volume and part lies outside, then the line segment is clipped and new vertex coordinates are computed for one or both vertices. A clipped line segment endpoint lies on both the original line segment and the boundary of the clip volume. ... " The above description is clear for 1px lines, but where should the new vertex be generated exactly for wide lines, where many points could be at the boundary of the clip volume? Is any point valid? (that would mean the dEQP test is bogus because there are multiple accepted renderings), should the new vertex be exactly at the center of the line? (that would make both deqp and intel rendering bogus). Then there is also this comment in "14.5.2.2 Wide Lines" inside the non-AA line rasterization chapter: "Non-antialiased line segments of width other than one are rasterized by off-setting them in the minor direction (for an x-major line, the minor direction is y, and for a y-major line, the minor direction is x) and replicating fragments in the minor direction (see figure 14.3). Let w be the width rounded to the nearest integer (if w = 0, then it is as if w = 1). If the line segment has endpoints given by (x 0 , y 0 ) and (x 1 , y 1 ) in window coordinates, the segment with endpoints (x 0 , y 0 − (w − 1)/2) and (x 1 , y 1 − (w − 1)/2) is rasterized, but instead of a single fragment, a column of fragments of height w (a row of fragments of length w for a y-major segment) is produced at each x (y for y-major) location. The lowest fragment of this column is the fragment that would be produced by rasterizing the segment of width 1 with the modified coordinates." This "suggests" that wide line rendering should be equivalent to rendering multiple lines of width=1 and if we actually did that then the expected rendering result after clipping would be what dEQP expects. I think the whole thing isn't 100% clear. Does anyone have any thoughts on what should be considered correct behavior in this case? Iago >>> >>> I am quite sure that i965 rendering is what (at least legacy GL) >>> expects. This is because the wide-line expansion is defined to be >>> happening at rasterization time, i.e. past clipping. Thus, at clipping >>> time, no matter if a line is wide or not, it is still really a line, >>> hence clipping will produce a single new endpoint at the viewport edge >>> and in rasterization this is then what will be used as the endpoint. >> >> That makes sense, but in that case I think i965 rendering would still be >> incorrect, the expected result would look something like this: >> >>
[Mesa-dev] [PATCH] mesa: Rename the CEILING() macro to DIV_ROUND_UP().
Some people have complained that code using the CEILING() macro is difficult to understand because it's not immediately obvious what it is supposed to do until you go and look up its definition. Use a more descriptive name that matches the similar utility macro in the Linux kernel. --- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 +- src/mesa/main/image.c | 2 +- src/mesa/main/macros.h | 2 +- src/mesa/main/pack.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index fe05110..9d5e648 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -3628,7 +3628,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, */ this->uniform_array_size = 1; if (prog_data) { - this->uniform_array_size = MAX2(CEILING(stage_prog_data->nr_params, 4), + this->uniform_array_size = MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1); } diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index c37900c..e97b006 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -151,7 +151,7 @@ _mesa_image_offset( GLuint dimensions, assert(format == GL_COLOR_INDEX || format == GL_STENCIL_INDEX); bytes_per_row = alignment -* CEILING( comp_per_pixel*pixels_per_row, 8*alignment ); +* DIV_ROUND_UP( comp_per_pixel*pixels_per_row, 8*alignment ); bytes_per_image = bytes_per_row * rows_per_image; diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h index cd5f2d6..2d59c6f 100644 --- a/src/mesa/main/macros.h +++ b/src/mesa/main/macros.h @@ -808,7 +808,7 @@ DIFFERENT_SIGNS(GLfloat x, GLfloat y) /** Compute ceiling of integer quotient of A divided by B. */ -#define CEILING( A, B ) ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 ) +#define DIV_ROUND_UP( A, B ) ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 ) /** casts to silence warnings with some compilers */ diff --git a/src/mesa/main/pack.c b/src/mesa/main/pack.c index 4cc8468..2111a76 100644 --- a/src/mesa/main/pack.c +++ b/src/mesa/main/pack.c @@ -155,7 +155,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, if (!source) return; - width_in_bytes = CEILING( width, 8 ); + width_in_bytes = DIV_ROUND_UP( width, 8 ); src = source; for (row = 0; row < height; row++) { GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, dest, -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/fs: Fix fs_inst::regs_written calculation for instructions with scalar dst.
Scalar registers are required to have zero stride, fix the regs_written calculation not to assume that the instruction writes zero registers in that case. v2: Rename CEILING() to DIV_ROUND_UP(). (Matt, Ken) Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0a82fb7..b75d377 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -126,7 +126,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case HW_REG: case MRF: case ATTR: - this->regs_written = (dst.width * dst.stride * type_sz(dst.type) + 31) / 32; + this->regs_written = + DIV_ROUND_UP(MAX2(dst.width * dst.stride, 1) * type_sz(dst.type), 32); break; case BAD_FILE: this->regs_written = 0; -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] util/u_atomic: Add new macro p_atomic_add
On 06/02/15 22:39, Carl Worth wrote: On Fri, Feb 06 2015, Aaron Watry wrote: Ignore me if this is a stupid question, but should those both be sizeof(short)? I'd expect the first to be sizeof(char). Not a stupid question. That was a copy-and-paste (kill-and-yank ?) bug of mine. Thanks for your attention to detail. I've fixed this in my tree. -Carl Hi Carl, Just one more tweak to InterlockedExchangeAdd64 as per patch attached. (The 64bit intrisicis are only available on 64-bits, but the non-intrisinc version is available everywhee.) With that u_test_atomic builds and passes for me both on 32 and 64bits. Sorry for the delay. And thanks for your help in keeping MSVC support on par. Jose diff --git a/src/util/u_atomic.h b/src/util/u_atomic.h index 4eb0ec6..0c43410 100644 --- a/src/util/u_atomic.h +++ b/src/util/u_atomic.h @@ -147,10 +147,10 @@ char _InterlockedCompareExchange8(char volatile *Destination8, char Exchange8, c (assert(!"should not get here"), 0)) #define p_atomic_add(_v, _i) (\ - sizeof *(_v) == sizeof(short) ? _InterlockedExchangeAdd8 ((char *) (_v), (_i)) : \ + sizeof *(_v) == sizeof(char)? _InterlockedExchangeAdd8 ((char *) (_v), (_i)) : \ sizeof *(_v) == sizeof(short) ? _InterlockedExchangeAdd16((short *) (_v), (_i)) : \ sizeof *(_v) == sizeof(long)? _InterlockedExchangeAdd ((long *) (_v), (_i)) : \ - sizeof *(_v) == sizeof(__int64) ? _InterlockedExchangeAdd64((__int64 *)(_v), (_i)) : \ + sizeof *(_v) == sizeof(__int64) ? InterlockedExchangeAdd64 ((__int64 *)(_v), (_i)) : \ (assert(!"should not get here"), 0)) #define p_atomic_cmpxchg(_v, _old, _new) (\ ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch
https://bugs.freedesktop.org/show_bug.cgi?id=89043 Bug ID: 89043 Summary: undefined symbol: _glapi_tls_Dispatch Product: Mesa Version: git Hardware: x86-64 (AMD64) OS: Linux (All) Status: NEW Severity: normal Priority: medium Component: Mesa core Assignee: mesa-dev@lists.freedesktop.org Reporter: adityaavina...@gmail.com QA Contact: mesa-dev@lists.freedesktop.org Hi, I am trying to build mesa from git. It was working fine for a week. Yesterday this error keeps poping up when I try to compile some tests. I re-installed mesa. It didn't work. I re-installed Ubuntu 14.04. It didn't work. Nothing is getting compiled, the binaries glxinfo, glxgears are also throwing the same error. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Bump MAX_IMAGE_UNIFORMS to 32.
So the i965 driver can expose 32 image uniforms per shader stage. --- src/mesa/main/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 4ec4b75..08e1a14 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -178,7 +178,7 @@ #define MAX_COMBINED_ATOMIC_BUFFERS(MAX_UNIFORM_BUFFERS * 6) /* Size of an atomic counter in bytes according to ARB_shader_atomic_counters */ #define ATOMIC_COUNTER_SIZE4 -#define MAX_IMAGE_UNIFORMS 16 +#define MAX_IMAGE_UNIFORMS 32 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */ #define MAX_IMAGE_UNITS(MAX_IMAGE_UNIFORMS * 6) /*@}*/ -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 02/14] i965: Allocate binding table space for shader images.
Kenneth Graunke writes: > On Saturday, February 07, 2015 03:03:44 AM Francisco Jerez wrote: >> Kenneth Graunke writes: >> >> > On Friday, February 06, 2015 07:23:16 PM Francisco Jerez wrote: >> >> Reviewed-by: Paul Berry >> >> --- >> >> src/mesa/drivers/dri/i965/brw_context.h | 5 + >> >> src/mesa/drivers/dri/i965/brw_shader.cpp | 7 +++ >> >> 2 files changed, 12 insertions(+) >> >> >> >> diff --git a/src/mesa/drivers/dri/i965/brw_context.h >> >> b/src/mesa/drivers/dri/i965/brw_context.h >> >> index bebb0be..e28c65d 100644 >> >> --- a/src/mesa/drivers/dri/i965/brw_context.h >> >> +++ b/src/mesa/drivers/dri/i965/brw_context.h >> >> @@ -341,6 +341,7 @@ struct brw_stage_prog_data { >> >>uint32_t gather_texture_start; >> >>uint32_t ubo_start; >> >>uint32_t abo_start; >> >> + uint32_t image_start; >> >>uint32_t shader_time_start; >> >>/** @} */ >> >> } binding_table; >> >> @@ -621,6 +622,9 @@ struct brw_vs_prog_data { >> >> /** Max number of atomic counter buffer objects in a shader */ >> >> #define BRW_MAX_ABO 16 >> >> >> >> +/** Max number of image units in a shader */ >> >> +#define BRW_MAX_IMAGES 16 >> >> + >> > >> > Out of curiosity, why 16? The Windows driver and Nvidia both seem to >> > use 8, while AMD offers 32: >> > >> > http://delphigl.de/glcapsviewer/gl_stats_caps_single.php?listreportsbycap=GL_MAX_IMAGE_UNITS >> > >> > We have 32 texture units on Haswell these days (though honestly, it's >> > pretty arbitrary...we could have more if we wanted...) >> > >> > That's not an objection - I'm just curious. As is, >> > Reviewed-by: Kenneth Graunke >> > >> It's completely arbitrary too, IIRC I set it to 16 because back when I >> wrote this patch we were still exposing 16 texture units, so I just used >> the same value. We could definitely expose 32 image units too, should I >> just increase the value to 32 locally? > > That seems like a good idea to me, unless you can think of some reason > not to. I'll leave it up to you :) Cool, I've increased BRW_MAX_IMAGES to 32 locally. The core mesa patch I just sent will also be required so the GL context and shader objects have enough state entries to keep track of all these image units. :) pgpjQCpi4ch2a.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: Define ffsll on OpenBSD.
On 06/02/2015 19:58, Matt Turner wrote: On Fri, Feb 6, 2015 at 3:38 AM, Jonathan Gray wrote: OpenBSD has ffs in libc but does not have ffsll so use the compiler builtin. PIPE_OS_BSD isn't suitable here as FreeBSD has ffsll in libc. Signed-off-by: Jonathan Gray --- src/gallium/auxiliary/util/u_math.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 5db5b66..ec282f3 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -531,6 +531,8 @@ unsigned ffs( unsigned u ) #elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) #define ffs __builtin_ffs #define ffsll __builtin_ffsll +#elif defined(__OpenBSD__) +#define ffsll __builtin_ffsll #endif Autoconf checks for presence of a bunch of builtins. Please use those instead (in this case, HAVE___BUILTIN_FFSLL). Yes, please. This has just been 'fixed' for MinGW, now for OpenBSD, and also needs fixing for Cygwin. Attached is a patch which attempts to do this using autoconf checks. From 580eb16295a94012c488db7ac44d09cb3ca8ff55 Mon Sep 17 00:00:00 2001 From: Jon TURNEY Date: Sat, 7 Feb 2015 14:17:35 + Subject: [PATCH] gallium/util: Use autoconf check for ffs() and ffsll() u_math.h should probably explicitly include config.h, but it didn't before and already relies on some HAVE_ defines, so I haven't added it. This makes a subtle change on non-linux platforms: previously we were explicitly using __builtin_ffs(), whereas now we are using ffs(), and the compiler might decide to convert that to a builtin. If we wanted to ensure the builtin was used for some reason, this isn't quite right. Since ffs() is used, we need to include for the prototype, which I think we expect to find on all POSIX systems, but not on MSVC. Add an autoconf check for that header and use that to conditionalize it's use. Define _GNU_SOURCE on Cygwin so that the GNU extension ffsll() is prototyped. I think I've put the right defines in the right place for the Android build system, but I haven't build tested that. Signed-off-by: Jon TURNEY --- Android.common.mk | 3 +++ configure.ac| 6 +- src/gallium/auxiliary/util/u_math.h | 12 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Android.common.mk b/Android.common.mk index 3e6d4c3..7719096 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -42,6 +42,9 @@ LOCAL_CFLAGS += \ LOCAL_CFLAGS += \ -DHAVE_PTHREAD=1 \ + -DHAVE_STRINGS_H \ + -DHAVE___BUILTIN_FFS \ + -DHAVE___BUILTIN_FFSLL \ -fvisibility=hidden \ -Wno-sign-compare diff --git a/configure.ac b/configure.ac index 7c2692e..cd4d7ed 100644 --- a/configure.ac +++ b/configure.ac @@ -172,6 +172,10 @@ if test "x$GCC" = xyes -a "x$acv_mesa_CLANG" = xno; then fi fi +dnl Check for ffs and ffsll +AC_CHECK_HEADER([strings.h]) +AC_CHECK_FUNCS([ffs ffsll]) + dnl Check for compiler builtins AX_GCC_BUILTIN([__builtin_bswap32]) AX_GCC_BUILTIN([__builtin_bswap64]) @@ -219,7 +223,7 @@ solaris*) DEFINES="$DEFINES -DSVR4" ;; cygwin*) -DEFINES="$DEFINES -D_XOPEN_SOURCE=700" +DEFINES="$DEFINES -D_GNU_SOURCE" ;; esac diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 5db5b66..45587fb 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -52,7 +52,7 @@ extern "C" { #include #include -#ifdef PIPE_OS_UNIX +#ifdef HAVE_STRINGS_H #include /* for ffs */ #endif @@ -528,10 +528,18 @@ unsigned ffs( unsigned u ) return i; } -#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) +#else +#ifndef HAVE_FFS +#ifdef HAVE___BUILTIN_FFS #define ffs __builtin_ffs +#endif +#endif +#ifndef HAVE_FFSLL +#ifdef HAVE___BUILTIN_FFSLL #define ffsll __builtin_ffsll #endif +#endif +#endif #endif /* FFS_DEFINED */ -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Rename mesa/src/util (Was: gallium/util: add u_bit_scan64)
FWIW using a different name for such helpers in src/util looks good to me (and I'm pretty indifferent to the actual name). We already have imports from mesa/main crept in there which need fixing and we certainly don't want to make the same mistake with gallium includes. Roland Am 08.02.2015 um 21:08 schrieb Jose Fonseca: > As I said earlier on this thread, it's not that simple: u_math.c depends on > u_cpu_detect.c and more. I wish it was a matter of merely moving u_math.[ch] > to src/util, but it's not. > > And because I don't believe I have the time to untangle u_math.[ch] from > everything else, I'm restrictied to more incremental solutions. That is, add > a new header to src/util with an unique name, and cherry-pick bits and pieces > from u_math.[ch] as deemed necessary/practical. > > Jose > > > > From: Marek Olšák > Sent: 08 February 2015 11:27 > To: Jose Fonseca > Cc: Matt Turner; Emil Velikov; ML mesa-dev > Subject: Re: [Mesa-dev] Rename mesa/src/util (Was: gallium/util: add > u_bit_scan64) > > I kind of like the "util_" prefix everywhere. u_math only depends on > p_config.h and p_compiler.h. I don't think it would be hard to move > those two into src/util as well. We have always wanted Mesa to use > more of Gallium. This might be a good start. > > Just my 2 cents. > > Marek > > On Sat, Feb 7, 2015 at 3:46 PM, Jose Fonseca wrote: >> On 07/02/15 00:10, Matt Turner wrote: >>> >>> On Fri, Feb 6, 2015 at 3:58 PM, Emil Velikov >>> wrote: > > "util" is meant to be for shared utility across the entire code base - > both Mesa and Gallium. It's been growing slowly as people move things > there. It might make sense to move a lot of src/gallium/auxiliary/util > there, in fact - there's always been a lot of duplication between Mesa > and Gallium's utility code. But that's up to the Gallium developers. > Imho currently the util library is growing on the basis of "we can share X let's throw it in there" rather than putting much thought about the structure/"design" of it - unlike in gallium. >>> >>> >>> Are you serious? Let's be honest with ourselves. I probably would have >>> been a better plan to not put commonly useful code deep in Gallium in >>> the first place. >> >> >> Historic reasons, as Brian explained. Gallium was supposed to become a >> dependency of Mesa but it didn't panned out. >> >>> Apparently this is what I get for trying to do the right thing an pull >>> the atomics code out into a place the rest of the Mesa project can use >>> it. >> >> >> I really appreciate you went the extra mile there. And for me it's way more >> important that we start sharing code than the naming structure. >> >> Especially when naming is subject to test/style whereas code reuse is >> something everybody can readily agree on. >> >> If the outcome of this email thread would be to dicentivate you to share >> more code, then that would be worst outcome indeed. >> >> Anyway, let's get out of this criticism spiral, and instead focus on how we >> can solve the issues to everybody's satisfaction. >> >>> How about instead of an annoying bikeshed thread we just finish moving >>> bits of Gallium's util directory to src/util and be done with it? >> >> >> If renaming src/util is not something we can agree fine. Let's forget about >> it. >> >> >> But I don't think I (or anybody) has the time to move >> src/gallium/auxiliary/util to src/util in one go. The code is entangled >> with src/gallium/include . >> >> That is, moving the whole src/gaullium/auxiliary/util to src/util equals to >> add gallium as dependency to whole mesa. If that's OK, then I agree with >> Brian's suggestion: might as well do that (leave util in >> src/gallium/axuliary ) and add src/gallium/* as includes/dependency >> everwhere. >> >> I think for Mesa (src/mesa) this is fine. I'm not sure about src/glsl. >> >> Again, I suspect this won't be something we'll agree neither. >> >> >> >> So I'm back to the beginning: I want to move some math helpers from >> src/gallium/auxiliary/util/u_math to somewhere inside src/util. I need >> _some_ name: cgrt_*.h is no good, math.h would collide with standard C >> headers, u_math.h would collide with src/gallium/auxiliary/util, so it must >> be something else. I'm open to suggestions. If none I'll go with >> mathhelpers.h >> >> >> >> Jose >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=AwIBaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=sAgl8jEh0WnJsnslLG59t6U2AbWzLAmPO3N7rrA9YPE&s=L3d9gFZyZkC0vZ0eYA6Bl3TFEihZ0Btt56_Hl3iTvS0&e= > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.freedeskt
[Mesa-dev] [PATCH] i965: Don't tile 1D miptrees.
It doesn't really improve locality of texture fetches, quite the opposite it's a waste of memory bandwidth and space due to tile alignment. v2: Check mt->logical_height0 instead of mt->target (Ken). Add short comment explaining why they shouldn't be tiled. --- src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 64752dd..0e3888f 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -488,6 +488,13 @@ intel_miptree_choose_tiling(struct brw_context *brw, base_format == GL_DEPTH_STENCIL_EXT) return I915_TILING_Y; + /* 1D textures (and 1D array textures) don't get any benefit from tiling, +* in fact it leads to a less efficient use of memory space and bandwidth +* due to tile alignment. +*/ + if (mt->logical_height0 == 1) + return I915_TILING_NONE; + int minimum_pitch = mt->total_width * mt->cpp; /* If the width is much smaller than a tile, don't bother tiling. */ -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 14/14] i965/gen7-8: Implement glMemoryBarrier().
Kristian Høgsberg writes: > On Fri, Feb 6, 2015 at 9:23 AM, Francisco Jerez wrote: >> --- >> src/mesa/drivers/dri/i965/brw_program.c | 40 >> + >> src/mesa/drivers/dri/i965/intel_reg.h | 1 + >> 2 files changed, 41 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_program.c >> b/src/mesa/drivers/dri/i965/brw_program.c >> index d9a3f05..793d963 100644 >> --- a/src/mesa/drivers/dri/i965/brw_program.c >> +++ b/src/mesa/drivers/dri/i965/brw_program.c >> @@ -44,6 +44,7 @@ >> #include "brw_context.h" >> #include "brw_shader.h" >> #include "brw_wm.h" >> +#include "intel_batchbuffer.h" >> >> static unsigned >> get_new_program_id(struct intel_screen *screen) >> @@ -179,6 +180,43 @@ brwProgramStringNotify(struct gl_context *ctx, >> return true; >> } >> >> +static void >> +brwMemoryBarrier(struct gl_context *ctx, GLbitfield barriers) >> +{ >> + struct brw_context *brw = brw_context(ctx); >> + unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE | >> +PIPE_CONTROL_NO_WRITE | >> +PIPE_CONTROL_CS_STALL); >> + assert(brw->gen >= 7 && brw->gen <= 8); >> + >> + if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | >> + GL_ELEMENT_ARRAY_BARRIER_BIT | >> + GL_COMMAND_BARRIER_BIT)) >> + bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE; >> + >> + if (barriers & GL_UNIFORM_BARRIER_BIT) >> + bits |= (PIPE_CONTROL_TC_FLUSH | >> + PIPE_CONTROL_CONST_CACHE_INVALIDATE); >> + >> + if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT) >> + bits |= PIPE_CONTROL_TC_FLUSH; >> + >> + if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT) >> + bits |= PIPE_CONTROL_WRITE_FLUSH; >> + >> + if (barriers & GL_FRAMEBUFFER_BARRIER_BIT) >> + bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH | >> + PIPE_CONTROL_WRITE_FLUSH); >> + >> + /* Typed surface messages are handled by the render cache on IVB, so we >> +* need to flush it too. >> +*/ >> + if (brw->gen == 7 && !brw->is_haswell) >> + bits |= PIPE_CONTROL_WRITE_FLUSH; >> + >> + brw_emit_pipe_control_flush(brw, bits); >> +} >> + >> void >> brw_add_texrect_params(struct gl_program *prog) >> { >> @@ -236,6 +274,8 @@ void brwInitFragProgFuncs( struct dd_function_table >> *functions ) >> >> functions->NewShader = brw_new_shader; >> functions->LinkShader = brw_link_shader; >> + >> + functions->MemoryBarrier = brwMemoryBarrier; > > Just a drive-by nitpick: brw_memory_barrier? > I'm OK with that, I've changed it locally. I used camel case because it seemed like all the other dd hooks implemented in the same file used it. Apparently all other functions defined in it don't, so I guess it's going to be inconsistent anyway. > Kristian > >> } >> >> void >> diff --git a/src/mesa/drivers/dri/i965/intel_reg.h >> b/src/mesa/drivers/dri/i965/intel_reg.h >> index 5ac0180..8b630c5 100644 >> --- a/src/mesa/drivers/dri/i965/intel_reg.h >> +++ b/src/mesa/drivers/dri/i965/intel_reg.h >> @@ -70,6 +70,7 @@ >> #define PIPE_CONTROL_ISP_DIS (1 << 9) >> #define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) >> /* GT */ >> +#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5) >> #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) >> #define PIPE_CONTROL_CONST_CACHE_INVALIDATE(1 << 3) >> #define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1 << 2) >> -- >> 2.1.3 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev pgpQcke_WQmt0.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] nesa-10.4.4: gallivm/lp_bld_misc.cpp:503:38: error: no viable conversion from 'ShaderMemoryManager *' to 'std::unique_ptr'
On 07/02/15 21:44, Sedat Dilek wrote: > Hi, > > I was building mesa v10.4.4 with my llvm-toolchain v3.6.0rc2. > > My build breaks like this... > > ... > > Please cherry-pick... > > commit ef7e0b39a24966526b102643523feac765771842 > "gallivm: Update for RTDyldMemoryManager becoming an unique_ptr." > > ..for mesa 10.4 Git branch. > Hi Sedat, Picking a fix in a stable branch against a non-final release sounds like a no-go in our books. As the official llvm 3.6 rolls out we'll pick this fix for the stable branches - until then I would recommend (a) applying it locally or (b) using mesa from the 10.5 or master branch. Thanks Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] mesa-10.4.4: BROKEN TLS support in GLX with llvm-toolchain v3.6.0rc2
Hi Sedat, On 07/02/15 22:42, Sedat Dilek wrote: > [ Please CC me I am not subscribed to mesa-dev and llvmdev MLs ] > > Hi, > > I already reported this when playing 1st time with my llvm-toolchain > v3.6.0rc2 and mesa v10.3.7 [1]. > The issue still remains in mesa v10.4.4. > > So, this is a field test to see if LLVM/Clang v3.6.0rc2 fits my needs. > > I see the following build-error... > ... > > make[4]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mapi' > CC shared_glapi_libglapi_la-entry.lo > clang version 3.6.0 (tags/RELEASE_360/rc2) > Target: x86_64-unknown-linux-gnu > Thread model: posix > Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6 > Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6.4 > Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9 > Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2 > Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9 > Candidate multilib: .;@m64 > Candidate multilib: 32;@m32 > Selected multilib: .;@m64 > "/opt/llvm-toolchain-3.6.0rc2/bin/clang" -cc1 -triple > x86_64-unknown-linux-gnu -emit-obj -mrelax-all -disable-free > -main-file-name entry.c -mrelocation-model static -mthread-model posix > -mdisable-fp-elim -relaxed-aliasing -fmath-errno -masm-verbose > -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu > x86-64 -target-linker-version 2.22 -v -g -dwarf-column-info > -coverage-file /home/wearefam/src/mesa/mesa-git/src/mapi/entry.c > -resource-dir /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0 > -dependency-file .deps/shared_glapi_libglapi_la-entry.Tpo > -sys-header-deps -MP -MT shared_glapi_libglapi_la-entry.lo -D > "PACKAGE_NAME=\"Mesa\"" -D "PACKAGE_TARNAME=\"mesa\"" -D > "PACKAGE_VERSION=\"10.4.4\"" -D "PACKAGE_STRING=\"Mesa 10.4.4\"" -D > "PACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\""; > -D "PACKAGE_URL=\"\"" -D "PACKAGE=\"mesa\"" -D "VERSION=\"10.4.4\"" -D > STDC_HEADERS=1 -D HAVE_SYS_TYPES_H=1 -D HAVE_SYS_STAT_H=1 -D > HAVE_STDLIB_H=1 -D HAVE_STRING_H=1 -D HAVE_MEMORY_H=1 -D > HAVE_STRINGS_H=1 -D HAVE_INTTYPES_H=1 -D HAVE_STDINT_H=1 -D > HAVE_UNISTD_H=1 -D HAVE_DLFCN_H=1 -D "LT_OBJDIR=\".libs/\"" -D > YYTEXT_POINTER=1 -D HAVE___BUILTIN_BSWAP32=1 -D > HAVE___BUILTIN_BSWAP64=1 -D HAVE___BUILTIN_CLZ=1 -D > HAVE___BUILTIN_CLZLL=1 -D HAVE___BUILTIN_CTZ=1 -D > HAVE___BUILTIN_EXPECT=1 -D HAVE___BUILTIN_FFS=1 -D > HAVE___BUILTIN_FFSLL=1 -D HAVE___BUILTIN_POPCOUNT=1 -D > HAVE___BUILTIN_POPCOUNTLL=1 -D HAVE___BUILTIN_UNREACHABLE=1 -D > HAVE_DLADDR=1 -D HAVE_PTHREAD=1 -D HAVE_LIBEXPAT=1 -D > USE_EXTERNAL_DXTN_LIB=1 -D _GNU_SOURCE -D USE_SSE41 -D DEBUG -D > USE_X86_64_ASM -D HAVE_XLOCALE_H -D HAVE_STRTOF -D HAVE_DLOPEN -D > HAVE_POSIX_MEMALIGN -D HAVE_LIBDRM -D GLX_USE_DRM -D HAVE_LIBUDEV -D > GLX_INDIRECT_RENDERING -D GLX_DIRECT_RENDERING -D GLX_USE_TLS -D > HAVE_ALIAS -D HAVE_MINCORE -D HAVE_LLVM=0x0306 -D LLVM_VERSION_PATCH=0 > -D MAPI_MODE_GLAPI -D > "MAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\"" -I . -I > ../../include -I ../../src/mapi -I ../../src/mapi -I /opt/xorg/include > -internal-isystem /usr/local/include -internal-isystem > /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include > -internal-externc-isystem /usr/include/x86_64-linux-gnu > -internal-externc-isystem /include -internal-externc-isystem > /usr/include -O0 -Wall -Werror=implicit-function-declaration > -Werror=missing-prototypes -std=c99 -fdebug-compilation-dir > /home/wearefam/src/mesa/mesa-git/src/mapi -ferror-limit 19 > -fmessage-length 0 -pthread -mstackrealign -fobjc-runtime=gcc > -fdiagnostics-show-option -o entry.o -x c ../../src/mapi/entry.c > clang -cc1 version 3.6.0 based upon LLVM 3.6.0 default target > x86_64-unknown-linux-gnu > ignoring nonexistent directory "/include" > ignoring duplicate directory "." > ignoring duplicate directory "." > #include "..." search starts here: > #include <...> search starts here: > . > ../../include > /opt/xorg/include > /usr/local/include > /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include > /usr/include/x86_64-linux-gnu > /usr/include > End of search list. > In file included from ../../src/mapi/entry.c:49: > ./entry_x86-64_tls.h:66:1: warning: tentative array definition assumed > to have one element > x86_64_entry_start[]; > ^ > fatal error: error in backend: symbol 'x86_64_entry_start' is already defined > clang: error: clang frontend command failed with exit code 70 (use -v > to see invocation) > clang version 3.6.0 (tags/RELEASE_360/rc2) > Target: x86_64-unknown-linux-gnu > Thread model: posix > clang: note: diagnostic msg: PLEASE submit a bug report to > http://llvm.org/bugs/ and include the crash backtrace, preprocessed > source, and associated run script. > clang: note: diagnostic msg: > > > PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT: > Preprocessed source(s) and associated run script(s) are located at: > clang: note: diagnostic msg: /tmp/
Re: [Mesa-dev] [PATCH 11/14] i965/gen7: Enable fragment shader dispatch if the program has image uniforms.
Kenneth Graunke writes: > On Friday, February 06, 2015 07:23:25 PM Francisco Jerez wrote: >> Shaders with image uniforms may have side effects. Make sure that >> fragment shader threads are dispatched if the shader has any image >> uniforms. >> --- >> src/mesa/drivers/dri/i965/gen7_wm_state.c | 6 +- >> 1 file changed, 5 insertions(+), 1 deletion(-) >> >> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c >> b/src/mesa/drivers/dri/i965/gen7_wm_state.c >> index 923414e..034ce08 100644 >> --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c >> +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c >> @@ -39,6 +39,9 @@ upload_wm_state(struct brw_context *brw) >> /* BRW_NEW_FRAGMENT_PROGRAM */ >> const struct brw_fragment_program *fp = >>brw_fragment_program_const(brw->fragment_program); >> + struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram; >> + struct gl_shader *shader = >> + prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL; >> /* BRW_NEW_FS_PROG_DATA */ >> const struct brw_wm_prog_data *prog_data = brw->wm.prog_data; >> bool writes_depth = prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; >> @@ -76,8 +79,9 @@ upload_wm_state(struct brw_context *brw) >>dw1 |= GEN7_WM_KILL_ENABLE; >> } >> >> - /* _NEW_BUFFERS | _NEW_COLOR */ >> + /* _NEW_BUFFERS | _NEW_COLOR, BRW_NEW_FRAGMENT_PROGRAM */ >> if (brw_color_buffer_write_enabled(brw) || writes_depth || >> + (shader && shader->NumImages) || >> dw1 & GEN7_WM_KILL_ENABLE) { >>dw1 |= GEN7_WM_DISPATCH_ENABLE; >> } >> > > Do you need to make a similar change in gen8_ps_state.c? I suppose it > could automatically get enabled when UAVs are on, but I haven't checked. > Nope, it's not necessary. There is no dispatch enable bit on Gen8, instead it's automatically calculated based on a number of things including the UAV access bits I'm setting in PATCH 12. > I've been trying to migrate the state upload code to exclusively rely on > prog_data, and not poke at the core Mesa shader structures directly > (and only listen to BRW_NEW_FS_PROG_DATA, not BRW_NEW_FRAGMENT_PROGRAM). > Okay, I'll fix that. > What do you think about adding a "uses_images" or "has_uav" flag to > prog_data and using it here? > I think we could just check prog_data->nr_image_params which is already there. > While I think I'd prefer that, this should also work, so it gets: > Reviewed-by: Kenneth Graunke Thanks! pgpBgeIxeD0GY.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Don't tile 1D miptrees.
Looks good to me. Reviewed-by: Neil Roberts - Neil Francisco Jerez writes: > It doesn't really improve locality of texture fetches, quite the > opposite it's a waste of memory bandwidth and space due to tile > alignment. > > v2: Check mt->logical_height0 instead of mt->target (Ken). Add short > comment explaining why they shouldn't be tiled. > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 64752dd..0e3888f 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -488,6 +488,13 @@ intel_miptree_choose_tiling(struct brw_context *brw, > base_format == GL_DEPTH_STENCIL_EXT) >return I915_TILING_Y; > > + /* 1D textures (and 1D array textures) don't get any benefit from tiling, > +* in fact it leads to a less efficient use of memory space and bandwidth > +* due to tile alignment. > +*/ > + if (mt->logical_height0 == 1) > + return I915_TILING_NONE; > + > int minimum_pitch = mt->total_width * mt->cpp; > > /* If the width is much smaller than a tile, don't bother tiling. */ > -- > 2.1.3 pgpW186qYlh05.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/skl: Implement WaDisable1DDepthStencil
Skylake+ doesn't support setting a depth buffer to a 1D surface but it does allow pretending it's a 2D texture with a height of 1 instead. This fixes the GL_DEPTH_COMPONENT_* tests of the copyteximage piglit test (and also seems to avoid a subsequent GPU hang). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89037 --- This probably only makes sense on top of Francisco Jerez's patch here: http://lists.freedesktop.org/archives/mesa-dev/2015-February/076392.html The copyteximage test is still failing with some other formats such as GL_RGB16 and some intensity and luminance formats but I'm just looking into that now. src/mesa/drivers/dri/i965/gen8_depth_state.c | 12 1 file changed, 12 insertions(+) diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index e428089..b4eb6e1 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -190,6 +190,18 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, case GL_TEXTURE_3D: assert(mt); depth = MAX2(mt->logical_depth0, 1); + surftype = translate_tex_target(gl_target); + break; + case GL_TEXTURE_1D_ARRAY: + case GL_TEXTURE_1D: + if (brw->gen >= 9) { + /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth + * textures but it does allow pretending it's a 2D texture + * instead. + */ + surftype = BRW_SURFACE_2D; + break; + } /* fallthrough */ default: surftype = translate_tex_target(gl_target); -- 1.9.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] util/u_atomic: Add new macro p_atomic_add
On Mon, Feb 09 2015, Jose Fonseca wrote: > Just one more tweak to InterlockedExchangeAdd64 as per patch attached. .. > With that u_test_atomic builds and passes for me both on 32 and > 64bits. Excellent. Thanks for the fix and for the testing report. > Sorry for the delay. And thanks for your help in keeping MSVC support on > par. No problem with the delay. The timing is just right since I'm about to send to the list an updated cache implementation that relies on this new atomic_add, and that also includes some unit tests that I'm hoping people can help test. (But, fair warning, there's a bit of file-system manipulation code there that I think will probably need some attention to work on non-Linux systems.) -Carl pgppFCoguqri0.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Rename the CEILING() macro to DIV_ROUND_UP().
Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/fs: Fix fs_inst::regs_written calculation for instructions with scalar dst.
Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] glsl: Making logical operations per-component?
On 02/08/2015 04:10 PM, Matt Turner wrote: > The GLSL IR ir_binop_logic_* operations match the source language > operators in that they only operate on scalars. > > In talking to Ilia, I realized that the vectorizer pass doesn't know > about that, and so it will happily vectorize the vertex shader in the > piglit test below. > > The i965 driver emits perfectly fine code for this and passes the > test, but ir_validate asserts that the operands to && are scalar > booleans. > > Should we relax this restriction and let the logical and/or/xor/not > operations operate per-component? Perhaps a small GLSL extension > codifying this and allowing it in the source language should be in > order? That sounds reasonable enough. It would just be changing a restriction specific to the IR, and it should affect the behavior observed by applications. I agree with Connor. Adding and, or, and xor functions similar to the existing not and comparitor functions would be the right way to expose this. > Sounds like something like this would simplify some code Ilia's working on. > > > [require] > GLSL >= 1.30 > > [vertex shader] > in vec4 vertex; > out vec4 color; > > uniform bvec4 a, b; > > void main() { > gl_Position = vertex; > > color.x = float(a.x && b.x); > color.y = float(a.y && b.y); > color.z = float(a.z && b.z); > color.w = float(a.w && b.w); > } > > [fragment shader] > in vec4 color; > out vec4 frag_color; > > void main() > { > frag_color = color; > } > > [vertex data] > vertex/float/2 > -1.0 -1.0 > 1.0 -1.0 > 1.0 1.0 > -1.0 1.0 > > [test] > uniform ivec4 a 1 1 1 1 > uniform ivec4 b 1 0 0 1 > draw arrays GL_TRIANGLE_FAN 0 4 > probe all rgba 1.0 0.0 0.0 1.0 > ` ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 09/28] glsl/ir: Add builtin function support for doubles
On 02/07/2015 05:25 PM, Matt Turner wrote: > On Sat, Feb 7, 2015 at 5:16 PM, Ilia Mirkin wrote: >> On Sat, Feb 7, 2015 at 8:10 PM, Ilia Mirkin wrote: > diff --git a/src/glsl/ir.h b/src/glsl/ir.h > index a0f48b2..6e7c654 100644 > --- a/src/glsl/ir.h > +++ b/src/glsl/ir.h > @@ -1275,6 +1275,13 @@ enum ir_expression_operation { > ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */ > ir_unop_any, > > + ir_unop_d2f, /**< Double-to-float conversion. */ > + ir_unop_f2d, /**< Float-to-double conversion. */ > + ir_unop_d2i, /**< Double-to-integer conversion. */ > + ir_unop_i2d, /**< Integer-to-double conversion. */ > + ir_unop_d2u, /**< Double-to-unsigned conversion. */ > + ir_unop_u2d, /**< Unsigned-to-double conversion. */ > + These should be properly ordered with the other cast operations. Also, ir_unop_d2b is missing. >>> >>> Blast! You're right. I thought we were OK with d2f + f2b but... I >>> guess not. I'll add it in :( >> >> BTW, I don't see a logic to the current cast operations' order, so I'm >> going to leave these as-is (but add d2b in... b2d is actually probably >> unnecessary). Right... f2d(b2f(x)) should be fine. > I'd at least put them above the bitcast operations and any with the > other similar things like f2u. Yeah... I wasn't completely clear, but that's what I was thinking too. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCHv2 11/14] i965/gen7: Enable fragment shader dispatch if the program has image uniforms.
Shaders with image uniforms may have side effects. Make sure that fragment shader threads are dispatched if the shader has any image uniforms. v2: Use brw_stage_state::nr_image_params to find out if the shader has image uniforms instead of checking core mesa data structures (Ken). --- src/mesa/drivers/dri/i965/gen7_wm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 923414e..e426b30 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -78,6 +78,7 @@ upload_wm_state(struct brw_context *brw) /* _NEW_BUFFERS | _NEW_COLOR */ if (brw_color_buffer_write_enabled(brw) || writes_depth || + brw->wm.base.nr_image_params || dw1 & GEN7_WM_KILL_ENABLE) { dw1 |= GEN7_WM_DISPATCH_ENABLE; } -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 12/28] glsl/ir: Add builtin constant function support for doubles
On 02/07/2015 06:25 PM, Ilia Mirkin wrote: > On Fri, Feb 6, 2015 at 3:02 AM, Ian Romanick wrote: >>> @@ -708,6 +763,9 @@ ir_expression::constant_expression_value(struct >>> hash_table *variable_context) >>>case GLSL_TYPE_FLOAT: >>> data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]); >>> break; >>> + case GLSL_TYPE_DOUBLE: >>> + data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]); >>> + break; >>>default: >>> assert(0); >> >> Maybe follow-up by replacing a bunch of these with unreachable(). > > That seems highly unrelated to this series. Good newbie project though > -- audit all the assert(const)'s and convert into unreachable as > necessary. [I think there's basically no case where assert(const) is > right, in light of unreachable existing...] It's related in that near by code is being touched. That makes it fresh in the minds of reviewers. It's not a big deal either way. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCHv2 12/14] i965/gen7-8: Poke the 3DSTATE UAV access enable bits.
v2: Set the PS UAV-only bit on HSW (Ken). --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/gen7_gs_state.c | 4 +++- src/mesa/drivers/dri/i965/gen7_vs_state.c | 13 - src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 + src/mesa/drivers/dri/i965/gen8_gs_state.c | 4 +++- src/mesa/drivers/dri/i965/gen8_ps_state.c | 3 +++ src/mesa/drivers/dri/i965/gen8_vs_state.c | 4 +++- 7 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fe255cc..d4648ee 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1691,6 +1691,7 @@ enum brw_message_target { # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12) /* DW4 */ # define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11 @@ -1716,6 +1717,7 @@ enum brw_message_target { # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) +# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12) /* DW4 */ # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17 @@ -2242,6 +2244,7 @@ enum brw_wm_barycentric_interp_mode { /* DW2 */ # define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) # define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) +# define HSW_WM_UAV_ONLY(1 << 30) #define _3DSTATE_PS0x7820 /* GEN7+ */ /* DW1: kernel pointer */ @@ -2264,6 +2267,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6) +# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5) # define GEN7_PS_POSOFFSET_NONE(0 << 3) # define GEN7_PS_POSOFFSET_CENTROID(2 << 3) # define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index e1c4f8b..39e5201 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -59,7 +59,9 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | +(brw->is_haswell && stage_state->nr_image_params ? + HSW_GS_UAV_ACCESS_ENABLE : 0)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 0e9b4fe..4741b78 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -77,6 +77,7 @@ upload_vs_state(struct brw_context *brw) uint32_t floating_point_mode = 0; const int max_threads_shift = brw->is_haswell ? HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; + const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base; if (!brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); @@ -91,19 +92,21 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (brw->is_haswell && stage_state->nr_image_params ? + HSW_VS_UAV_ACCESS_ENABLE : 0)); - if (brw->vs.prog_data->base.base.total_scratch) { + if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->base.base.total_scratch) - 11); + ffs(prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } - OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg << + OUT_BATCH((prog_data->base.dispatch_grf_start_reg << GEN6_VS_DISPATCH_START_GRF_SHIFT) | -(brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | +
Re: [Mesa-dev] [PATCHv2 32/32] i965: Don't compact instructions with unmapped bits.
On Mon, Feb 9, 2015 at 6:08 AM, Francisco Jerez wrote: > Some instruction bits don't have a mapping defined to any compacted > instruction field. If they're ever set and we end up compacting the > instruction they will be forced to zero. Avoid using compaction in such > cases. > > v2: Align multiple lines of an expression to the same column. Change > conditional compaction of 3-source instructions to an > assertion. (Matt) > --- > src/mesa/drivers/dri/i965/brw_eu_compact.c | 46 > ++ > 1 file changed, 46 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c > b/src/mesa/drivers/dri/i965/brw_eu_compact.c > index 8e33bcb..f80bcc1 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c > @@ -843,10 +843,53 @@ set_3src_source_index(struct brw_context *brw, > brw_compact_inst *dst, brw_inst * > } > > static bool > +has_unmapped_bits(struct brw_context *brw, brw_inst *src) > +{ > + /* Check for instruction bits that don't map to any of the fields of the > +* compacted instruction. The instruction cannot be compacted if any of > +* them are set. They overlap with: > +* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) > +* - Dst.AddrImm[9] (bit 47 on Gen8) > +* - Src0.AddrImm[9] (bit 95 on Gen8) > +*/ > + if (brw->gen >= 8) > + return brw_inst_bits(src, 95, 95) || > + brw_inst_bits(src, 47, 47) || > + brw_inst_bits(src, 11, 11) || > + brw_inst_bits(src, 7, 7); 11 (NibCtrl) is the only one that isn't reserved. I'd rather assert that reserved bits are not set. > + else > + return brw_inst_bits(src, 95, 91) || > + brw_inst_bits(src, 47, 47) || > + brw_inst_bits(src, 7, 7) || > + (brw->gen < 7 && brw_inst_bits(src, 90, 90)); Same thing, 47 (NibCtrl) is the only non-reserved bit. I'd rather assert about the others. > +} > + > +static bool > +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) > +{ > + /* Check for three-source instruction bits that don't map to any of the > +* fields of the compacted instruction. All of them seem to be reserved > +* bits currently. > +*/ > + assert(brw->gen >= 8); > + if (brw->gen >= 9 || brw->is_cherryview) > + return brw_inst_bits(src, 127, 127) || > + brw_inst_bits(src, 105, 105) || 105 is Src1's SubRegNum[word] on CHV and is included in SourceIndex. > + brw_inst_bits(src, 7, 7); > + else > + return brw_inst_bits(src, 127, 126) || > + brw_inst_bits(src, 105, 105) || > + brw_inst_bits(src, 84, 84) || > + brw_inst_bits(src, 36, 35) || > + brw_inst_bits(src, 7, 7); Since bit 7 is reserved in all cases, we might just assert(brw_inst_bits(src, 7, 7) == 0) in brw_try_compact_instruction(). I don't have a preference. So, I think it'd be nice to differentiate between bits that aren't included in compact instructions and reserved bits. (Sorry, I could have given you this feedback in the first time around) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCHv2 13/14] i965/gen7-8: Set up early depth/stencil control appropriately for image load/store.
v2: Store early fragment test mode in brw_wm_prog_data instead of getting it from core mesa data structures (Ken). --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 3 +++ src/mesa/drivers/dri/i965/brw_wm.c | 2 ++ src/mesa/drivers/dri/i965/gen7_wm_state.c| 6 ++ src/mesa/drivers/dri/i965/gen8_depth_state.c | 6 +++--- src/mesa/drivers/dri/i965/gen8_ps_state.c| 6 ++ 6 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index e9ef678..1dfd3e6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -445,6 +445,7 @@ struct brw_wm_prog_data { uint8_t computed_depth_mode; + bool early_fragment_tests; bool no_8; bool dual_src_blend; bool uses_pos_offset; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d4648ee..f36f677 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -2220,6 +2220,9 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_WM_KILL_ENABLE (1 << 25) # define GEN7_WM_COMPUTED_DEPTH_MODE_SHIFT 23 # define GEN7_WM_USES_SOURCE_DEPTH (1 << 20) +# define GEN7_WM_EARLY_DS_CONTROL_NORMAL(0 << 21) +# define GEN7_WM_EARLY_DS_CONTROL_PSEXEC(1 << 21) +# define GEN7_WM_EARLY_DS_CONTROL_PREPS (2 << 21) # define GEN7_WM_USES_SOURCE_W (1 << 19) # define GEN7_WM_POSITION_ZW_PIXEL (0 << 17) # define GEN7_WM_POSITION_ZW_CENTROID (2 << 17) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index d2b032f..0513190 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -182,6 +182,8 @@ bool do_wm_prog(struct brw_context *brw, prog_data.computed_depth_mode = computed_depth_mode(&fp->program); + prog_data.early_fragment_tests = fs && fs->EarlyFragmentTests; + /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ if (!prog) prog_data.base.use_alt_mode = true; diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index 60d58c5..6087968 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -102,6 +102,12 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK; } + /* BRW_NEW_FS_PROG_DATA */ + if (prog_data->early_fragment_tests) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; + else if (brw->wm.base.nr_image_params) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; + /* _NEW_BUFFERS | _NEW_COLOR */ if (brw->is_haswell && !(brw_color_buffer_write_enabled(brw) || writes_depth) && diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c index e428089..7250e28 100644 --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c @@ -241,10 +241,10 @@ pma_fix_enable(const struct brw_context *brw) */ const bool hiz_enabled = depth_irb && intel_renderbuffer_has_hiz(depth_irb); - /* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). -* We always leave this set to EDSC_NORMAL (0). + /* BRW_NEW_FS_PROG_DATA: +* 3DSTATE_WM::Early Depth/Stencil Control != EDSC_PREPS (2). */ - const bool edsc_not_preps = true; + const bool edsc_not_preps = !brw->wm.prog_data->early_fragment_tests; /* 3DSTATE_PS_EXTRA::PixelShaderValid is always true. */ const bool pixel_shader_valid = true; diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index b932437..fdab9a3 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -105,6 +105,12 @@ upload_wm_state(struct brw_context *brw) dw1 |= brw->wm.prog_data->barycentric_interp_modes << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; + /* BRW_NEW_FS_PROG_DATA */ + if (brw->wm.prog_data->early_fragment_tests) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PREPS; + else if (brw->wm.base.nr_image_params) + dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC; + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); OUT_BATCH(dw1); -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/32] i965/fs: Fix stack allocation of fs_inst and stop stealing src array provided on construction.
Sounds well reasoned to me. Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] gallium/util: Define ffsll on OpenBSD.
On 09/02/15 16:59, Jon TURNEY wrote: On 06/02/2015 19:58, Matt Turner wrote: On Fri, Feb 6, 2015 at 3:38 AM, Jonathan Gray wrote: OpenBSD has ffs in libc but does not have ffsll so use the compiler builtin. PIPE_OS_BSD isn't suitable here as FreeBSD has ffsll in libc. Signed-off-by: Jonathan Gray --- src/gallium/auxiliary/util/u_math.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 5db5b66..ec282f3 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -531,6 +531,8 @@ unsigned ffs( unsigned u ) #elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) #define ffs __builtin_ffs #define ffsll __builtin_ffsll +#elif defined(__OpenBSD__) +#define ffsll __builtin_ffsll #endif Autoconf checks for presence of a bunch of builtins. Please use those instead (in this case, HAVE___BUILTIN_FFSLL). Yes, please. This has just been 'fixed' for MinGW, now for OpenBSD, and also needs fixing for Cygwin. Attached is a patch which attempts to do this using autoconf checks. The issue is that this will break scons builds unless these checks are replicated there. And SCons implementation of configure checks are not great to be honest -- they either are cached (but in such way were multiple builds from same source tree pick up wrong values) or they need to be re-checked on every build (wasting time for incremental builds). This is why, within reason, I personally like to avoid configure checks when practical. So for now I'd prefer to leave MinGW 'fixed' as you put it. But fell free to fix the other platforms as you propose. BTW, isn't there any standard include that defines ffsll as macro or inline on top of __builtin_ffsll for systems that support it? Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 01/32] i965: Factor out virtual GRF allocation to a separate object.
On Fri, Feb 6, 2015 at 2:40 PM, Matt Turner wrote: > 8 - Sent a question > 9 - Like mine better? > 10 - Looks wrong to me > 11-13 - Asked Jason to review > 14 - Asked for an example showing the problem > 15-16 - R-b > 17-18 - R-b - but wow, did we really hit these in practice? > 19-20 - R-b > 21 - Sent a question > 22-25 - R-b 8, 9, and 21 are R-b ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCHv2 32/32] i965: Don't compact instructions with unmapped bits.
Matt Turner writes: > On Mon, Feb 9, 2015 at 6:08 AM, Francisco Jerez wrote: >> Some instruction bits don't have a mapping defined to any compacted >> instruction field. If they're ever set and we end up compacting the >> instruction they will be forced to zero. Avoid using compaction in such >> cases. >> >> v2: Align multiple lines of an expression to the same column. Change >> conditional compaction of 3-source instructions to an >> assertion. (Matt) >> --- >> src/mesa/drivers/dri/i965/brw_eu_compact.c | 46 >> ++ >> 1 file changed, 46 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c >> b/src/mesa/drivers/dri/i965/brw_eu_compact.c >> index 8e33bcb..f80bcc1 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c >> +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c >> @@ -843,10 +843,53 @@ set_3src_source_index(struct brw_context *brw, >> brw_compact_inst *dst, brw_inst * >> } >> >> static bool >> +has_unmapped_bits(struct brw_context *brw, brw_inst *src) >> +{ >> + /* Check for instruction bits that don't map to any of the fields of the >> +* compacted instruction. The instruction cannot be compacted if any of >> +* them are set. They overlap with: >> +* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) >> +* - Dst.AddrImm[9] (bit 47 on Gen8) >> +* - Src0.AddrImm[9] (bit 95 on Gen8) >> +*/ >> + if (brw->gen >= 8) >> + return brw_inst_bits(src, 95, 95) || >> + brw_inst_bits(src, 47, 47) || >> + brw_inst_bits(src, 11, 11) || >> + brw_inst_bits(src, 7, 7); > > 11 (NibCtrl) is the only one that isn't reserved. I'd rather assert > that reserved bits are not set. > No, bits 47 and 95 are valid too and part of the AddrImm fields. >> + else >> + return brw_inst_bits(src, 95, 91) || >> + brw_inst_bits(src, 47, 47) || >> + brw_inst_bits(src, 7, 7) || >> + (brw->gen < 7 && brw_inst_bits(src, 90, 90)); > > Same thing, 47 (NibCtrl) is the only non-reserved bit. I'd rather > assert about the others. > >> +} >> + >> +static bool >> +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) >> +{ >> + /* Check for three-source instruction bits that don't map to any of the >> +* fields of the compacted instruction. All of them seem to be reserved >> +* bits currently. >> +*/ >> + assert(brw->gen >= 8); >> + if (brw->gen >= 9 || brw->is_cherryview) >> + return brw_inst_bits(src, 127, 127) || >> + brw_inst_bits(src, 105, 105) || > > 105 is Src1's SubRegNum[word] on CHV and is included in SourceIndex. > Ahh, that's true, good catch. >> + brw_inst_bits(src, 7, 7); >> + else >> + return brw_inst_bits(src, 127, 126) || >> + brw_inst_bits(src, 105, 105) || >> + brw_inst_bits(src, 84, 84) || >> + brw_inst_bits(src, 36, 35) || >> + brw_inst_bits(src, 7, 7); > > Since bit 7 is reserved in all cases, we might just > assert(brw_inst_bits(src, 7, 7) == 0) in > brw_try_compact_instruction(). I don't have a preference. > > So, I think it'd be nice to differentiate between bits that aren't > included in compact instructions and reserved bits. > > (Sorry, I could have given you this feedback in the first time around) pgp3QKojZ224p.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass
So last time you posted this, I had some suggestions but I realized that in light of the fact that we want to do value numbering, it didn't seem like my suggestions were any good. But now that I've thought about it a little bit, it seems to me like a better plan might be to pull instructions out of their basic blocks and into the global list of instructions earlier. That is, instead of marking instructions as pinned in gcm_pin_instructions_block(), put the ones that aren't pinned in state->instrs and then instead of walking over all the basic blocks and bailing on pinned instructions when scheduling early and late, just walk over state->instrs. This should be faster, and when we do value-numbering there's a natural place to do it, i.e. after pulling instructions but before scheduling -- this means we only value-number the non-pinned instructions, which is what we want since we're not coalescing the pinned instructions in this pass. This is also more similar to how it's presented in the paper (although our scheduling algorithm will still be different I guess). On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand wrote: > v2 Jason Ekstrand : > - Use nir_dominance_lca for computing least common anscestors > - Use the block index for comparing dominance tree depths > - Pin things that do partial derivatives > --- > src/glsl/Makefile.sources | 1 + > src/glsl/nir/nir.h | 2 + > src/glsl/nir/nir_opt_gcm.c | 501 > + > 3 files changed, 504 insertions(+) > create mode 100644 src/glsl/nir/nir_opt_gcm.c > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > index a580b6e..69cb2e6 100644 > --- a/src/glsl/Makefile.sources > +++ b/src/glsl/Makefile.sources > @@ -43,6 +43,7 @@ NIR_FILES = \ > nir/nir_opt_copy_propagate.c \ > nir/nir_opt_cse.c \ > nir/nir_opt_dce.c \ > + nir/nir_opt_gcm.c \ > nir/nir_opt_global_to_local.c \ > nir/nir_opt_peephole_select.c \ > nir/nir_opt_remove_phis.c \ > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 90a7001..55fb43d 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader); > bool nir_opt_dce_impl(nir_function_impl *impl); > bool nir_opt_dce(nir_shader *shader); > > +void nir_opt_gcm(nir_shader *shader); > + > bool nir_opt_peephole_select(nir_shader *shader); > bool nir_opt_peephole_ffma(nir_shader *shader); > > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c > new file mode 100644 > index 000..d48518b > --- /dev/null > +++ b/src/glsl/nir/nir_opt_gcm.c > @@ -0,0 +1,501 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + *Jason Ekstrand (ja...@jlekstrand.net) > + * > + */ > + > +#include "nir.h" > + > +/* > + * Implements Global Code Motion. A description of GCM can be found in > + * "Global Code Motion; Global Value Numbering" by Cliff Click. > + * Unfortunately, the algorithm presented in the paper is broken in a > + * number of ways. The algorithm used here differs substantially from the > + * one in the paper but it is, in my opinion, much easier to read and > + * verify correcness. > + */ > + > +struct gcm_block_info { > + /* Number of loops this block is inside */ > + unsigned loop_depth; > + > + /* The last instruction inserted into this block. This is used as we > +* traverse the instructions and insert them back into the program to > +* put them in the right order. > +*/ > + nir_instr *last_instr; > +}; > + > +struct gcm_state { > + nir_function_impl *impl; > + nir_instr *instr; > + > + /* Marks all instructions that have been visited by the curren pass */ > + BITSET_WORD *visited; > + > + /* Marks instructions that are "pinned", i.e. can
[Mesa-dev] [PATCHv3 32/32] i965: Don't compact instructions with unmapped bits.
Some instruction bits don't have a mapping defined to any compacted instruction field. If they're ever set and we end up compacting the instruction they will be forced to zero. Avoid using compaction in such cases. v2: Align multiple lines of an expression to the same column. Change conditional compaction of 3-source instructions to an assertion. (Matt) v3: The 3-source instruction bit 105 is part of SourceIndex on CHV. Add assertion that reserved bit 7 is not set. (Matt) Document overlap with UIP and 64-bit immediate fields. --- src/mesa/drivers/dri/i965/brw_eu_compact.c | 48 ++ 1 file changed, 48 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index 8e33bcb..cb93636 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -843,10 +843,54 @@ set_3src_source_index(struct brw_context *brw, brw_compact_inst *dst, brw_inst * } static bool +has_unmapped_bits(struct brw_context *brw, brw_inst *src) +{ + /* Check for instruction bits that don't map to any of the fields of the +* compacted instruction. The instruction cannot be compacted if any of +* them are set. They overlap with: +* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) +* - Dst.AddrImm[9] (bit 47 on Gen8) +* - Src0.AddrImm[9] (bit 95 on Gen8) +* - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8) +* - UIP[31] (bit 95 on Gen8) +*/ + if (brw->gen >= 8) + return brw_inst_bits(src, 95, 95) || + brw_inst_bits(src, 47, 47) || + brw_inst_bits(src, 11, 11) || + brw_inst_bits(src, 7, 7); + else + return brw_inst_bits(src, 95, 91) || + brw_inst_bits(src, 47, 47) || + brw_inst_bits(src, 7, 7) || + (brw->gen < 7 && brw_inst_bits(src, 90, 90)); +} + +static bool +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) +{ + /* Check for three-source instruction bits that don't map to any of the +* fields of the compacted instruction. All of them seem to be reserved +* bits currently. +*/ + assert(brw->gen >= 8); + if (brw->gen >= 9 || brw->is_cherryview) + return brw_inst_bits(src, 127, 127) || + brw_inst_bits(src, 7, 7); + else + return brw_inst_bits(src, 127, 126) || + brw_inst_bits(src, 105, 105) || + brw_inst_bits(src, 84, 84) || + brw_inst_bits(src, 36, 35) || + brw_inst_bits(src, 7, 7); +} + +static bool brw_try_compact_3src_instruction(struct brw_context *brw, brw_compact_inst *dst, brw_inst *src) { assert(brw->gen >= 8); + assert(!has_3src_unmapped_bits(brw, src)); #define compact(field) \ brw_compact_inst_set_3src_##field(dst, brw_inst_3src_##field(brw, src)) @@ -914,6 +958,7 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, brw_compact_inst temp; assert(brw_inst_cmpt_control(brw, src) == 0); + assert(brw_inst_bits(src, 7, 7) == 0); /* reserved bit */ if (is_3src(brw_inst_opcode(brw, src))) { if (brw->gen >= 8) { @@ -937,6 +982,9 @@ brw_try_compact_instruction(struct brw_context *brw, brw_compact_inst *dst, return false; } + if (has_unmapped_bits(brw, src)) + return false; + memset(&temp, 0, sizeof(temp)); brw_compact_inst_set_opcode(&temp, brw_inst_opcode(brw, src)); -- 2.1.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] mesa-10.4.4: BROKEN TLS support in GLX with llvm-toolchain v3.6.0rc2
On 09/02/15 17:44, Emil Velikov wrote: Hi Sedat, On 07/02/15 22:42, Sedat Dilek wrote: [ Please CC me I am not subscribed to mesa-dev and llvmdev MLs ] Hi, I already reported this when playing 1st time with my llvm-toolchain v3.6.0rc2 and mesa v10.3.7 [1]. The issue still remains in mesa v10.4.4. So, this is a field test to see if LLVM/Clang v3.6.0rc2 fits my needs. I see the following build-error... ... make[4]: Entering directory `/home/wearefam/src/mesa/mesa-git/src/mapi' CC shared_glapi_libglapi_la-entry.lo clang version 3.6.0 (tags/RELEASE_360/rc2) Target: x86_64-unknown-linux-gnu Thread model: posix Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6 Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.6.4 Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9 Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2 Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9 Candidate multilib: .;@m64 Candidate multilib: 32;@m32 Selected multilib: .;@m64 "/opt/llvm-toolchain-3.6.0rc2/bin/clang" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -mrelax-all -disable-free -main-file-name entry.c -mrelocation-model static -mthread-model posix -mdisable-fp-elim -relaxed-aliasing -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -target-linker-version 2.22 -v -g -dwarf-column-info -coverage-file /home/wearefam/src/mesa/mesa-git/src/mapi/entry.c -resource-dir /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0 -dependency-file .deps/shared_glapi_libglapi_la-entry.Tpo -sys-header-deps -MP -MT shared_glapi_libglapi_la-entry.lo -D "PACKAGE_NAME=\"Mesa\"" -D "PACKAGE_TARNAME=\"mesa\"" -D "PACKAGE_VERSION=\"10.4.4\"" -D "PACKAGE_STRING=\"Mesa 10.4.4\"" -D "PACKAGE_BUGREPORT=\"https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_enter-5Fbug.cgi-3Fproduct-3DMesa&d=AwID-g&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=LdbG9btkWrIw7ABhWDTiHtGwFnB7cDCY6cHlnzXawlQ&s=sH_YjhwwusfBRXm5X-z7LRFkCz68ItpSfCmRnHJAYkw&e= \"" -D "PACKAGE_URL=\"\"" -D "PACKAGE=\"mesa\"" -D "VERSION=\"10.4.4\"" -D STDC_HEADERS=1 -D HAVE_SYS_TYPES_H=1 -D HAVE_SYS_STAT_H=1 -D HAVE_STDLIB_H=1 -D HAVE_STRING_H=1 -D HAVE_MEMORY_H=1 -D HAVE_STRINGS_H=1 -D HAVE_INTTYPES_H=1 -D HAVE_STDINT_H=1 -D HAVE_UNISTD_H=1 -D HAVE_DLFCN_H=1 -D "LT_OBJDIR=\".libs/\"" -D YYTEXT_POINTER=1 -D HAVE___BUILTIN_BSWAP32=1 -D HAVE___BUILTIN_BSWAP64=1 -D HAVE___BUILTIN_CLZ=1 -D HAVE___BUILTIN_CLZLL=1 -D HAVE___BUILTIN_CTZ=1 -D HAVE___BUILTIN_EXPECT=1 -D HAVE___BUILTIN_FFS=1 -D HAVE___BUILTIN_FFSLL=1 -D HAVE___BUILTIN_POPCOUNT=1 -D HAVE___BUILTIN_POPCOUNTLL=1 -D HAVE___BUILTIN_UNREACHABLE=1 -D HAVE_DLADDR=1 -D HAVE_PTHREAD=1 -D HAVE_LIBEXPAT=1 -D USE_EXTERNAL_DXTN_LIB=1 -D _GNU_SOURCE -D USE_SSE41 -D DEBUG -D USE_X86_64_ASM -D HAVE_XLOCALE_H -D HAVE_STRTOF -D HAVE_DLOPEN -D HAVE_POSIX_MEMALIGN -D HAVE_LIBDRM -D GLX_USE_DRM -D HAVE_LIBUDEV -D GLX_INDIRECT_RENDERING -D GLX_DIRECT_RENDERING -D GLX_USE_TLS -D HAVE_ALIAS -D HAVE_MINCORE -D HAVE_LLVM=0x0306 -D LLVM_VERSION_PATCH=0 -D MAPI_MODE_GLAPI -D "MAPI_ABI_HEADER=\"shared-glapi/glapi_mapi_tmp.h\"" -I . -I ../../include -I ../../src/mapi -I ../../src/mapi -I /opt/xorg/include -internal-isystem /usr/local/include -internal-isystem /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O0 -Wall -Werror=implicit-function-declaration -Werror=missing-prototypes -std=c99 -fdebug-compilation-dir /home/wearefam/src/mesa/mesa-git/src/mapi -ferror-limit 19 -fmessage-length 0 -pthread -mstackrealign -fobjc-runtime=gcc -fdiagnostics-show-option -o entry.o -x c ../../src/mapi/entry.c clang -cc1 version 3.6.0 based upon LLVM 3.6.0 default target x86_64-unknown-linux-gnu ignoring nonexistent directory "/include" ignoring duplicate directory "." ignoring duplicate directory "." #include "..." search starts here: #include <...> search starts here: . ../../include /opt/xorg/include /usr/local/include /opt/llvm-toolchain-3.6.0rc2/bin/../lib/clang/3.6.0/include /usr/include/x86_64-linux-gnu /usr/include End of search list. In file included from ../../src/mapi/entry.c:49: ./entry_x86-64_tls.h:66:1: warning: tentative array definition assumed to have one element x86_64_entry_start[]; ^ fatal error: error in backend: symbol 'x86_64_entry_start' is already defined clang: error: clang frontend command failed with exit code 70 (use -v to see invocation) clang version 3.6.0 (tags/RELEASE_360/rc2) Target: x86_64-unknown-linux-gnu Thread model: posix clang: note: diagnostic msg: PLEASE submit a bug report to https://urldefense.proofpoint.com/v2/url?u=http-3A__llvm.org_bugs_&d=AwID-g&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=zfmBZnnVGHeYde45pMKNnVyzeaZbdIqVLprmZCM2zzE&m=LdbG9btkWrIw7ABhWDTiHtGwFnB7cDCY6cHlnzXawlQ&s=
Re: [Mesa-dev] [PATCH] mesa: Bump MAX_IMAGE_UNIFORMS to 32.
On Monday, February 09, 2015 06:37:45 PM Francisco Jerez wrote: > So the i965 driver can expose 32 image uniforms per shader stage. > --- > src/mesa/main/config.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h > index 4ec4b75..08e1a14 100644 > --- a/src/mesa/main/config.h > +++ b/src/mesa/main/config.h > @@ -178,7 +178,7 @@ > #define MAX_COMBINED_ATOMIC_BUFFERS(MAX_UNIFORM_BUFFERS * 6) > /* Size of an atomic counter in bytes according to > ARB_shader_atomic_counters */ > #define ATOMIC_COUNTER_SIZE4 > -#define MAX_IMAGE_UNIFORMS 16 > +#define MAX_IMAGE_UNIFORMS 32 > /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */ > #define MAX_IMAGE_UNITS(MAX_IMAGE_UNIFORMS * 6) > /*@}*/ > Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Don't tile 1D miptrees.
On Monday, February 09, 2015 07:15:28 PM Francisco Jerez wrote: > It doesn't really improve locality of texture fetches, quite the > opposite it's a waste of memory bandwidth and space due to tile > alignment. > > v2: Check mt->logical_height0 instead of mt->target (Ken). Add short > comment explaining why they shouldn't be tiled. > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 64752dd..0e3888f 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -488,6 +488,13 @@ intel_miptree_choose_tiling(struct brw_context *brw, > base_format == GL_DEPTH_STENCIL_EXT) >return I915_TILING_Y; > > + /* 1D textures (and 1D array textures) don't get any benefit from tiling, > +* in fact it leads to a less efficient use of memory space and bandwidth > +* due to tile alignment. > +*/ > + if (mt->logical_height0 == 1) > + return I915_TILING_NONE; > + > int minimum_pitch = mt->total_width * mt->cpp; > > /* If the width is much smaller than a tile, don't bother tiling. */ > Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/skl: Implement WaDisable1DDepthStencil
On Monday, February 09, 2015 05:57:20 PM Neil Roberts wrote: > Skylake+ doesn't support setting a depth buffer to a 1D surface but it > does allow pretending it's a 2D texture with a height of 1 instead. > > This fixes the GL_DEPTH_COMPONENT_* tests of the copyteximage piglit > test (and also seems to avoid a subsequent GPU hang). > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89037 > --- > This probably only makes sense on top of Francisco Jerez's patch here: > > http://lists.freedesktop.org/archives/mesa-dev/2015-February/076392.html > > The copyteximage test is still failing with some other formats such as > GL_RGB16 and some intensity and luminance formats but I'm just looking > into that now. > > src/mesa/drivers/dri/i965/gen8_depth_state.c | 12 > 1 file changed, 12 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c > b/src/mesa/drivers/dri/i965/gen8_depth_state.c > index e428089..b4eb6e1 100644 > --- a/src/mesa/drivers/dri/i965/gen8_depth_state.c > +++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c > @@ -190,6 +190,18 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw, > case GL_TEXTURE_3D: >assert(mt); >depth = MAX2(mt->logical_depth0, 1); > + surftype = translate_tex_target(gl_target); > + break; > + case GL_TEXTURE_1D_ARRAY: > + case GL_TEXTURE_1D: > + if (brw->gen >= 9) { > + /* WaDisable1DDepthStencil. Skylake+ doesn't support 1D depth > + * textures but it does allow pretending it's a 2D texture > + * instead. > + */ > + surftype = BRW_SURFACE_2D; > + break; > + } >/* fallthrough */ > default: >surftype = translate_tex_target(gl_target); > Reviewed-by: Kenneth Graunke signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCHv3 32/32] i965: Don't compact instructions with unmapped bits.
On Mon, Feb 9, 2015 at 12:26 PM, Francisco Jerez wrote: > Some instruction bits don't have a mapping defined to any compacted > instruction field. If they're ever set and we end up compacting the > instruction they will be forced to zero. Avoid using compaction in such > cases. > > v2: Align multiple lines of an expression to the same column. Change > conditional compaction of 3-source instructions to an > assertion. (Matt) > v3: The 3-source instruction bit 105 is part of SourceIndex on CHV. > Add assertion that reserved bit 7 is not set. (Matt) > Document overlap with UIP and 64-bit immediate fields. > --- > src/mesa/drivers/dri/i965/brw_eu_compact.c | 48 > ++ > 1 file changed, 48 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c > b/src/mesa/drivers/dri/i965/brw_eu_compact.c > index 8e33bcb..cb93636 100644 > --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c > +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c > @@ -843,10 +843,54 @@ set_3src_source_index(struct brw_context *brw, > brw_compact_inst *dst, brw_inst * > } > > static bool > +has_unmapped_bits(struct brw_context *brw, brw_inst *src) > +{ > + /* Check for instruction bits that don't map to any of the fields of the > +* compacted instruction. The instruction cannot be compacted if any of > +* them are set. They overlap with: > +* - NibCtrl (bit 47 on Gen7, bit 11 on Gen8) > +* - Dst.AddrImm[9] (bit 47 on Gen8) > +* - Src0.AddrImm[9] (bit 95 on Gen8) > +* - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8) > +* - UIP[31] (bit 95 on Gen8) > +*/ > + if (brw->gen >= 8) > + return brw_inst_bits(src, 95, 95) || > + brw_inst_bits(src, 47, 47) || > + brw_inst_bits(src, 11, 11) || > + brw_inst_bits(src, 7, 7); > + else > + return brw_inst_bits(src, 95, 91) || > + brw_inst_bits(src, 47, 47) || > + brw_inst_bits(src, 7, 7) || > + (brw->gen < 7 && brw_inst_bits(src, 90, 90)); > +} > + > +static bool > +has_3src_unmapped_bits(struct brw_context *brw, brw_inst *src) > +{ > + /* Check for three-source instruction bits that don't map to any of the > +* fields of the compacted instruction. All of them seem to be reserved > +* bits currently. > +*/ > + assert(brw->gen >= 8); > + if (brw->gen >= 9 || brw->is_cherryview) > + return brw_inst_bits(src, 127, 127) || > + brw_inst_bits(src, 7, 7); > + else > + return brw_inst_bits(src, 127, 126) || > + brw_inst_bits(src, 105, 105) || > + brw_inst_bits(src, 84, 84) || > + brw_inst_bits(src, 36, 35) || > + brw_inst_bits(src, 7, 7); Thanks for adding the assertion for bit 7. Do you think it's valuable to check it here as well? I wouldn't think it was. What I'm suggesting is that we only do assertions about reserved bits. If they're set, we'll trigger the assertion only in debug builds, but I think that's sufficient. I think we should only check useful bits here (i.e., non-reserverd bits in the uncompacted instruction that don't exist in the compacted instruction). I think effectively that means removing the checks for bit 7, all of the bits in the 3-src BDW case (always return false), and 90 on gen < 7. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir: Mark nir_print_instr's instr pointer as const.
Printing instructions doesn't modify them, so we can mark the parameter const. --- src/glsl/nir/nir.h | 2 +- src/glsl/nir/nir_print.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 4cb2e92..ceda977 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1489,7 +1489,7 @@ void nir_index_ssa_defs(nir_function_impl *impl); void nir_index_blocks(nir_function_impl *impl); void nir_print_shader(nir_shader *shader, FILE *fp); -void nir_print_instr(nir_instr *instr, FILE *fp); +void nir_print_instr(const nir_instr *instr, FILE *fp); #ifdef DEBUG void nir_validate_shader(nir_shader *shader); diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 9c07950..6a3c6a0 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -576,7 +576,7 @@ print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp) } static void -print_instr(nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp) +print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp) { print_tabs(tabs, fp); @@ -882,7 +882,7 @@ nir_print_shader(nir_shader *shader, FILE *fp) } void -nir_print_instr(nir_instr *instr, FILE *fp) +nir_print_instr(const nir_instr *instr, FILE *fp) { print_instr(instr, NULL, 0, fp); } -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: Mark nir_print_instr's instr pointer as const.
Reviewed-by: Jason Ekstrand On Mon, Feb 9, 2015 at 4:42 PM, Kenneth Graunke wrote: > Printing instructions doesn't modify them, so we can mark the parameter > const. > --- > src/glsl/nir/nir.h | 2 +- > src/glsl/nir/nir_print.c | 4 ++-- > 2 files changed, 3 insertions(+), 3 deletions(-) > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 4cb2e92..ceda977 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1489,7 +1489,7 @@ void nir_index_ssa_defs(nir_function_impl *impl); > void nir_index_blocks(nir_function_impl *impl); > > void nir_print_shader(nir_shader *shader, FILE *fp); > -void nir_print_instr(nir_instr *instr, FILE *fp); > +void nir_print_instr(const nir_instr *instr, FILE *fp); > > #ifdef DEBUG > void nir_validate_shader(nir_shader *shader); > diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c > index 9c07950..6a3c6a0 100644 > --- a/src/glsl/nir/nir_print.c > +++ b/src/glsl/nir/nir_print.c > @@ -576,7 +576,7 @@ print_parallel_copy_instr(nir_parallel_copy_instr > *instr, FILE *fp) > } > > static void > -print_instr(nir_instr *instr, print_var_state *state, unsigned tabs, FILE > *fp) > +print_instr(const nir_instr *instr, print_var_state *state, unsigned > tabs, FILE *fp) > { > print_tabs(tabs, fp); > > @@ -882,7 +882,7 @@ nir_print_shader(nir_shader *shader, FILE *fp) > } > > void > -nir_print_instr(nir_instr *instr, FILE *fp) > +nir_print_instr(const nir_instr *instr, FILE *fp) > { > print_instr(instr, NULL, 0, fp); > } > -- > 2.2.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: Fix logic error.
Sorry I didn't get to this earlier. It got lost in the backlog. Reviewed-by: Jason Ekstrand Out of curiosity, are there any shader-db results for this? If not, that's ok. --Jason On Wed, Feb 4, 2015 at 3:19 PM, Matt Turner wrote: > --- > I don't know if this is right, but what we had before was definitely > wrong. (And gcc warned about it!) > > src/glsl/nir/nir_lower_phis_to_scalar.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c > b/src/glsl/nir/nir_lower_phis_to_scalar.c > index 3bb5cc7..7c2f539 100644 > --- a/src/glsl/nir/nir_lower_phis_to_scalar.c > +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c > @@ -65,9 +65,9 @@ is_phi_src_scalarizable(nir_phi_src *src, > * are ok too. > */ >return nir_op_infos[src_alu->op].output_size == 0 || > - src_alu->op != nir_op_vec2 || > - src_alu->op != nir_op_vec3 || > - src_alu->op != nir_op_vec4; > + (src_alu->op != nir_op_vec2 && > + src_alu->op != nir_op_vec3 && > + src_alu->op != nir_op_vec4); > } > > case nir_instr_type_phi: > -- > 2.0.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: Fix logic error.
On Mon, Feb 9, 2015 at 1:45 PM, Jason Ekstrand wrote: > Sorry I didn't get to this earlier. It got lost in the backlog. > > Reviewed-by: Jason Ekstrand > > Out of curiosity, are there any shader-db results for this? If not, that's > ok. I didn't even know if this the correct fix, so no shader-db numbers. :) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass
On Mon, Feb 9, 2015 at 3:01 PM, Connor Abbott wrote: > So last time you posted this, I had some suggestions but I realized > that in light of the fact that we want to do value numbering, it > didn't seem like my suggestions were any good. But now that I've > thought about it a little bit, it seems to me like a better plan might > be to pull instructions out of their basic blocks and into the global > list of instructions earlier. That is, instead of marking instructions > as pinned in gcm_pin_instructions_block(), put the ones that aren't > pinned in state->instrs and then instead of walking over all the basic > blocks and bailing on pinned instructions when scheduling early and > late, just walk over state->instrs. This should be faster, and when we > do value-numbering there's a natural place to do it, i.e. after > pulling instructions but before scheduling -- this means we only > value-number the non-pinned instructions, which is what we want since > we're not coalescing the pinned instructions in this pass. This is > also more similar to how it's presented in the paper (although our > scheduling algorithm will still be different I guess). > > On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand > wrote: > > v2 Jason Ekstrand : > > - Use nir_dominance_lca for computing least common anscestors > > - Use the block index for comparing dominance tree depths > > - Pin things that do partial derivatives > > --- > > src/glsl/Makefile.sources | 1 + > > src/glsl/nir/nir.h | 2 + > > src/glsl/nir/nir_opt_gcm.c | 501 > + > > 3 files changed, 504 insertions(+) > > create mode 100644 src/glsl/nir/nir_opt_gcm.c > > > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > > index a580b6e..69cb2e6 100644 > > --- a/src/glsl/Makefile.sources > > +++ b/src/glsl/Makefile.sources > > @@ -43,6 +43,7 @@ NIR_FILES = \ > > nir/nir_opt_copy_propagate.c \ > > nir/nir_opt_cse.c \ > > nir/nir_opt_dce.c \ > > + nir/nir_opt_gcm.c \ > > nir/nir_opt_global_to_local.c \ > > nir/nir_opt_peephole_select.c \ > > nir/nir_opt_remove_phis.c \ > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > > index 90a7001..55fb43d 100644 > > --- a/src/glsl/nir/nir.h > > +++ b/src/glsl/nir/nir.h > > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader); > > bool nir_opt_dce_impl(nir_function_impl *impl); > > bool nir_opt_dce(nir_shader *shader); > > > > +void nir_opt_gcm(nir_shader *shader); > > + > > bool nir_opt_peephole_select(nir_shader *shader); > > bool nir_opt_peephole_ffma(nir_shader *shader); > > > > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c > > new file mode 100644 > > index 000..d48518b > > --- /dev/null > > +++ b/src/glsl/nir/nir_opt_gcm.c > > @@ -0,0 +1,501 @@ > > +/* > > + * Copyright © 2014 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person > obtaining a > > + * copy of this software and associated documentation files (the > "Software"), > > + * to deal in the Software without restriction, including without > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice (including the > next > > + * paragraph) shall be included in all copies or substantial portions > of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > > + * IN THE SOFTWARE. > > + * > > + * Authors: > > + *Jason Ekstrand (ja...@jlekstrand.net) > > + * > > + */ > > + > > +#include "nir.h" > > + > > +/* > > + * Implements Global Code Motion. A description of GCM can be found in > > + * "Global Code Motion; Global Value Numbering" by Cliff Click. > > + * Unfortunately, the algorithm presented in the paper is broken in a > > + * number of ways. The algorithm used here differs substantially from > the > > + * one in the paper but it is, in my opinion, much easier to read and > > + * verify correcness. > > + */ > > + > > +struct gcm_block_info { > > + /* Number of loops this block is inside */ > > + unsigned loop_depth; > > + > > + /* The last instruction inserted into this block. This is used as we > > +* traverse the instructions and insert them back into the program to > > +* put them in the right order. > > +
Re: [Mesa-dev] [PATCH] nir: Fix logic error.
On Mon, Feb 9, 2015 at 4:54 PM, Matt Turner wrote: > On Mon, Feb 9, 2015 at 1:45 PM, Jason Ekstrand > wrote: > > Sorry I didn't get to this earlier. It got lost in the backlog. > > > > Reviewed-by: Jason Ekstrand > > > > Out of curiosity, are there any shader-db results for this? If not, > that's > > ok. > > I didn't even know if this the correct fix, so no shader-db numbers. :) > Wow... I need to learn to read... I think we actually want || and ==. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] nir: Fix logic error.
On Mon, Feb 9, 2015 at 2:06 PM, Jason Ekstrand wrote: > Wow... I need to learn to read... I think we actually want || and ==. Feel free to take it over. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch
https://bugs.freedesktop.org/show_bug.cgi?id=89043 --- Comment #1 from Aditya Atluri --- Hi, I re-installed Ubuntu 14.04.1 and did not check "install updates" during installation. Install and Upgrade all the packages. sudo apt-get install freeglut3-dev build-essential libx11-dev libxmu-dev libxi-dev libgl1-mesa-glx libglu1-mesa libglu1-mesa-dev clone the mesa repo. sudo apt-get install bison flex python-mako x11proto-dri3-dev x11proto-present-dev libudev-dev libexpat1-dev expat llvm build it. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch
https://bugs.freedesktop.org/show_bug.cgi?id=89043 Aditya Atluri changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |NOTABUG -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 00/10] i965/fs: Combine constants and unconditionally emit MADs
On Mon, Feb 9, 2015 at 7:05 AM, Eero Tamminen wrote: > Hi, > > On 02/05/2015 06:21 AM, Matt Turner wrote: >> >> total instructions in shared programs: 5895414 -> 5747578 (-2.51%) >> instructions in affected programs: 3618111 -> 3470275 (-4.09%) >> helped:20492 >> HURT: 4449 >> GAINED:54 >> LOST: 146 >> >> and with NIR, that already emits MADs always: >> >> total instructions in shared programs: 7992936 -> 7772474 (-2.76%) >> instructions in affected programs: 3738730 -> 3518268 (-5.90%) >> helped:22082 >> HURT: 3445 >> GAINED:70 >> LOST: 78 >> >> There are some particularly exciting individual improvements too: >> >> kerbal-space-program/667: FS SIMD8: 1024 -> 561 (-45.21%) >> kerbal-space-program/676: FS SIMD8: 1021 -> 558 (-45.35%) > > > From how old Kerbal version these shaders are from? They were added to shader-db in November, so that would be my guess. > I just noticed that recent Kerbal outputs large amount of register spill & > alloc failure messages with INTEL_DEBUG=perf, whereas output I've saved from > a last year Kerbal version, had much less of them. I don't have anything to offer about those changes, but Kerbal uses large arrays of immediate values that aren't marked as const and that they variably index. Ken and I talked about it, and making those uniforms would make out code significantly better. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch
https://bugs.freedesktop.org/show_bug.cgi?id=89043 Aditya Atluri changed: What|Removed |Added Status|RESOLVED|REOPENED Resolution|NOTABUG |--- -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch
https://bugs.freedesktop.org/show_bug.cgi?id=89043 --- Comment #2 from Aditya Atluri --- It came back! -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] nir/lower_phis_to_scalar: Fix some logic in is_phi_scalarizable
--- src/glsl/nir/nir_lower_phis_to_scalar.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c index 3bb5cc7..7cd93ea 100644 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -65,9 +65,9 @@ is_phi_src_scalarizable(nir_phi_src *src, * are ok too. */ return nir_op_infos[src_alu->op].output_size == 0 || - src_alu->op != nir_op_vec2 || - src_alu->op != nir_op_vec3 || - src_alu->op != nir_op_vec4; + src_alu->op == nir_op_vec2 || + src_alu->op == nir_op_vec3 || + src_alu->op == nir_op_vec4; } case nir_instr_type_phi: -- 2.2.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass
On Mon, Feb 9, 2015 at 5:04 PM, Jason Ekstrand wrote: > > > On Mon, Feb 9, 2015 at 3:01 PM, Connor Abbott wrote: >> >> So last time you posted this, I had some suggestions but I realized >> that in light of the fact that we want to do value numbering, it >> didn't seem like my suggestions were any good. But now that I've >> thought about it a little bit, it seems to me like a better plan might >> be to pull instructions out of their basic blocks and into the global >> list of instructions earlier. That is, instead of marking instructions >> as pinned in gcm_pin_instructions_block(), put the ones that aren't >> pinned in state->instrs and then instead of walking over all the basic >> blocks and bailing on pinned instructions when scheduling early and >> late, just walk over state->instrs. This should be faster, and when we >> do value-numbering there's a natural place to do it, i.e. after >> pulling instructions but before scheduling -- this means we only >> value-number the non-pinned instructions, which is what we want since >> we're not coalescing the pinned instructions in this pass. This is >> also more similar to how it's presented in the paper (although our >> scheduling algorithm will still be different I guess). >> >> On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand >> wrote: >> > v2 Jason Ekstrand : >> > - Use nir_dominance_lca for computing least common anscestors >> > - Use the block index for comparing dominance tree depths >> > - Pin things that do partial derivatives >> > --- >> > src/glsl/Makefile.sources | 1 + >> > src/glsl/nir/nir.h | 2 + >> > src/glsl/nir/nir_opt_gcm.c | 501 >> > + >> > 3 files changed, 504 insertions(+) >> > create mode 100644 src/glsl/nir/nir_opt_gcm.c >> > >> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources >> > index a580b6e..69cb2e6 100644 >> > --- a/src/glsl/Makefile.sources >> > +++ b/src/glsl/Makefile.sources >> > @@ -43,6 +43,7 @@ NIR_FILES = \ >> > nir/nir_opt_copy_propagate.c \ >> > nir/nir_opt_cse.c \ >> > nir/nir_opt_dce.c \ >> > + nir/nir_opt_gcm.c \ >> > nir/nir_opt_global_to_local.c \ >> > nir/nir_opt_peephole_select.c \ >> > nir/nir_opt_remove_phis.c \ >> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h >> > index 90a7001..55fb43d 100644 >> > --- a/src/glsl/nir/nir.h >> > +++ b/src/glsl/nir/nir.h >> > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader); >> > bool nir_opt_dce_impl(nir_function_impl *impl); >> > bool nir_opt_dce(nir_shader *shader); >> > >> > +void nir_opt_gcm(nir_shader *shader); >> > + >> > bool nir_opt_peephole_select(nir_shader *shader); >> > bool nir_opt_peephole_ffma(nir_shader *shader); >> > >> > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c >> > new file mode 100644 >> > index 000..d48518b >> > --- /dev/null >> > +++ b/src/glsl/nir/nir_opt_gcm.c >> > @@ -0,0 +1,501 @@ >> > +/* >> > + * Copyright © 2014 Intel Corporation >> > + * >> > + * Permission is hereby granted, free of charge, to any person >> > obtaining a >> > + * copy of this software and associated documentation files (the >> > "Software"), >> > + * to deal in the Software without restriction, including without >> > limitation >> > + * the rights to use, copy, modify, merge, publish, distribute, >> > sublicense, >> > + * and/or sell copies of the Software, and to permit persons to whom >> > the >> > + * Software is furnished to do so, subject to the following conditions: >> > + * >> > + * The above copyright notice and this permission notice (including the >> > next >> > + * paragraph) shall be included in all copies or substantial portions >> > of the >> > + * Software. >> > + * >> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> > EXPRESS OR >> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> > MERCHANTABILITY, >> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >> > SHALL >> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> > OTHER >> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >> > ARISING >> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> > DEALINGS >> > + * IN THE SOFTWARE. >> > + * >> > + * Authors: >> > + *Jason Ekstrand (ja...@jlekstrand.net) >> > + * >> > + */ >> > + >> > +#include "nir.h" >> > + >> > +/* >> > + * Implements Global Code Motion. A description of GCM can be found in >> > + * "Global Code Motion; Global Value Numbering" by Cliff Click. >> > + * Unfortunately, the algorithm presented in the paper is broken in a >> > + * number of ways. The algorithm used here differs substantially from >> > the >> > + * one in the paper but it is, in my opinion, much easier to read and >> > + * verify correcness. >> > + */ >> > + >> > +struct gcm_block_info { >> > + /* Number of loops this block is inside */ >> > + unsigned loop_dep
Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass
On Mon, Feb 9, 2015 at 12:01 PM, Connor Abbott wrote: > So last time you posted this, I had some suggestions but I realized > that in light of the fact that we want to do value numbering, it > didn't seem like my suggestions were any good. But now that I've > thought about it a little bit, it seems to me like a better plan might > be to pull instructions out of their basic blocks and into the global > list of instructions earlier. That is, instead of marking instructions > as pinned in gcm_pin_instructions_block(), put the ones that aren't > pinned in state->instrs and then instead of walking over all the basic > blocks and bailing on pinned instructions when scheduling early and > late, just walk over state->instrs. This should be faster, and when we > do value-numbering there's a natural place to do it, i.e. after > pulling instructions but before scheduling -- this means we only > value-number the non-pinned instructions, which is what we want since > we're not coalescing the pinned instructions in this pass. This is > also more similar to how it's presented in the paper (although our > scheduling algorithm will still be different I guess). > Sorry for not addressing this comment. A couple of things: 1) We need the "is this instruction pinned" metadata for other things than just not iterating over them so we need to stash it somehow anyway. 2) I think we want value numbering to be its own pass. We just won't run the validator in between that and GCM. These passes are tricky and complex enough that I don't want to make it even more confusing by saying "Oh, now we value-number". Obviously, GVN and GCM will have to be kept in sync so we don't combine two things that can't be code-motioned but that's not a big deal. As far as pulling them as we pin and then only ever walking over non-pinned instructions. I thought about it.. Then I didn't for some historical reason but I don't remember what that was anymore. We could go back and do that and it would be a bit fasater. I can do that if you'd like but I don't think it'll matter much. > > On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand > wrote: > > v2 Jason Ekstrand : > > - Use nir_dominance_lca for computing least common anscestors > > - Use the block index for comparing dominance tree depths > > - Pin things that do partial derivatives > > --- > > src/glsl/Makefile.sources | 1 + > > src/glsl/nir/nir.h | 2 + > > src/glsl/nir/nir_opt_gcm.c | 501 > + > > 3 files changed, 504 insertions(+) > > create mode 100644 src/glsl/nir/nir_opt_gcm.c > > > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > > index a580b6e..69cb2e6 100644 > > --- a/src/glsl/Makefile.sources > > +++ b/src/glsl/Makefile.sources > > @@ -43,6 +43,7 @@ NIR_FILES = \ > > nir/nir_opt_copy_propagate.c \ > > nir/nir_opt_cse.c \ > > nir/nir_opt_dce.c \ > > + nir/nir_opt_gcm.c \ > > nir/nir_opt_global_to_local.c \ > > nir/nir_opt_peephole_select.c \ > > nir/nir_opt_remove_phis.c \ > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > > index 90a7001..55fb43d 100644 > > --- a/src/glsl/nir/nir.h > > +++ b/src/glsl/nir/nir.h > > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader); > > bool nir_opt_dce_impl(nir_function_impl *impl); > > bool nir_opt_dce(nir_shader *shader); > > > > +void nir_opt_gcm(nir_shader *shader); > > + > > bool nir_opt_peephole_select(nir_shader *shader); > > bool nir_opt_peephole_ffma(nir_shader *shader); > > > > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c > > new file mode 100644 > > index 000..d48518b > > --- /dev/null > > +++ b/src/glsl/nir/nir_opt_gcm.c > > @@ -0,0 +1,501 @@ > > +/* > > + * Copyright © 2014 Intel Corporation > > + * > > + * Permission is hereby granted, free of charge, to any person > obtaining a > > + * copy of this software and associated documentation files (the > "Software"), > > + * to deal in the Software without restriction, including without > limitation > > + * the rights to use, copy, modify, merge, publish, distribute, > sublicense, > > + * and/or sell copies of the Software, and to permit persons to whom the > > + * Software is furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice (including the > next > > + * paragraph) shall be included in all copies or substantial portions > of the > > + * Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT > SHALL > > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
Re: [Mesa-dev] [PATCH v2 11/12] nir: Add a global code motion (GCM) pass
On Mon, Feb 9, 2015 at 6:11 PM, Jason Ekstrand wrote: > > > On Mon, Feb 9, 2015 at 12:01 PM, Connor Abbott wrote: >> >> So last time you posted this, I had some suggestions but I realized >> that in light of the fact that we want to do value numbering, it >> didn't seem like my suggestions were any good. But now that I've >> thought about it a little bit, it seems to me like a better plan might >> be to pull instructions out of their basic blocks and into the global >> list of instructions earlier. That is, instead of marking instructions >> as pinned in gcm_pin_instructions_block(), put the ones that aren't >> pinned in state->instrs and then instead of walking over all the basic >> blocks and bailing on pinned instructions when scheduling early and >> late, just walk over state->instrs. This should be faster, and when we >> do value-numbering there's a natural place to do it, i.e. after >> pulling instructions but before scheduling -- this means we only >> value-number the non-pinned instructions, which is what we want since >> we're not coalescing the pinned instructions in this pass. This is >> also more similar to how it's presented in the paper (although our >> scheduling algorithm will still be different I guess). > > > Sorry for not addressing this comment. A couple of things: > > 1) We need the "is this instruction pinned" metadata for other things than > just not iterating over them so we need to stash it somehow anyway. Sure. > > 2) I think we want value numbering to be its own pass. We just won't run > the validator in between that and GCM. These passes are tricky and complex > enough that I don't want to make it even more confusing by saying "Oh, now > we value-number". Obviously, GVN and GCM will have to be kept in sync so we > don't combine two things that can't be code-motioned but that's not a big > deal. The thing is, GVN and GCM are very closely related -- not only can you not do GVN without GCM afterwards, but as you mentioned you need to know which instructions are pinned in order to not combine things that can't be combined. So I think calling GVN as a helper/subroutine as a part of GCM ("hey, take this list of instructions and merge any duplicates") might actually be better than keeping them as two kind-of-separate-but-not-really passes that have to be run in a specific order and share information -- they're actually *more* self-contained with the former approach than the latter, since all GVN needs to do is merge the instructions in the list it's given and we don't need to run the passes in a specific order without validating them in-between. > > As far as pulling them as we pin and then only ever walking over non-pinned > instructions. I thought about it.. Then I didn't for some historical > reason but I don't remember what that was anymore. We could go back and do > that and it would be a bit fasater. I can do that if you'd like but I don't > think it'll matter much. True, it'll probably only be worth it if we do what I mentioned above. > >> >> >> On Mon, Feb 9, 2015 at 3:19 AM, Jason Ekstrand >> wrote: >> > v2 Jason Ekstrand : >> > - Use nir_dominance_lca for computing least common anscestors >> > - Use the block index for comparing dominance tree depths >> > - Pin things that do partial derivatives >> > --- >> > src/glsl/Makefile.sources | 1 + >> > src/glsl/nir/nir.h | 2 + >> > src/glsl/nir/nir_opt_gcm.c | 501 >> > + >> > 3 files changed, 504 insertions(+) >> > create mode 100644 src/glsl/nir/nir_opt_gcm.c >> > >> > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources >> > index a580b6e..69cb2e6 100644 >> > --- a/src/glsl/Makefile.sources >> > +++ b/src/glsl/Makefile.sources >> > @@ -43,6 +43,7 @@ NIR_FILES = \ >> > nir/nir_opt_copy_propagate.c \ >> > nir/nir_opt_cse.c \ >> > nir/nir_opt_dce.c \ >> > + nir/nir_opt_gcm.c \ >> > nir/nir_opt_global_to_local.c \ >> > nir/nir_opt_peephole_select.c \ >> > nir/nir_opt_remove_phis.c \ >> > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h >> > index 90a7001..55fb43d 100644 >> > --- a/src/glsl/nir/nir.h >> > +++ b/src/glsl/nir/nir.h >> > @@ -1589,6 +1589,8 @@ bool nir_opt_cse(nir_shader *shader); >> > bool nir_opt_dce_impl(nir_function_impl *impl); >> > bool nir_opt_dce(nir_shader *shader); >> > >> > +void nir_opt_gcm(nir_shader *shader); >> > + >> > bool nir_opt_peephole_select(nir_shader *shader); >> > bool nir_opt_peephole_ffma(nir_shader *shader); >> > >> > diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c >> > new file mode 100644 >> > index 000..d48518b >> > --- /dev/null >> > +++ b/src/glsl/nir/nir_opt_gcm.c >> > @@ -0,0 +1,501 @@ >> > +/* >> > + * Copyright © 2014 Intel Corporation >> > + * >> > + * Permission is hereby granted, free of charge, to any person >> > obtaining a >> > + * copy of this software and associated documentation files (the >> > "So
[Mesa-dev] [Bug 89043] undefined symbol: _glapi_tls_Dispatch
https://bugs.freedesktop.org/show_bug.cgi?id=89043 --- Comment #3 from Brian Paul --- Try reconfiguring with --enable-glx-tls and rebuilding. -- You are receiving this mail because: You are the QA Contact for the bug. You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 02/10] nir: Properly clean up CF nodes when we remove them
For some reason, the v2 version of this disappeared from my inbox, so I'll say Reviewed-by: Connor Abbott I'm not convinced this is enough for when we optimize constant if-statement conditions, but I'll leave that be for now. On Thu, Feb 5, 2015 at 5:28 PM, Jason Ekstrand wrote: > Previously, if you remved a CF node that still had instructions in it, none > of the use/def information from those instructions would get cleaned up. > Also, we weren't removing if statements from the if_uses of the > corresponding register or SSA def. This commit fixes both of these > problems > --- > src/glsl/nir/nir.c | 54 > ++ > 1 file changed, 54 insertions(+) > > diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c > index 9a88bd3..8ea7bb5 100644 > --- a/src/glsl/nir/nir.c > +++ b/src/glsl/nir/nir.c > @@ -1148,6 +1148,58 @@ stitch_blocks(nir_block *before, nir_block *after) > exec_node_remove(&after->cf_node.node); > } > > +static void > +remove_defs_uses(nir_instr *instr); > + > +static void > +cleanup_cf_node(nir_cf_node *node) > +{ > + switch (node->type) { > + case nir_cf_node_block: { > + nir_block *block = nir_cf_node_as_block(node); > + /* We need to walk the instructions and clean up defs/uses */ > + nir_foreach_instr(block, instr) > + remove_defs_uses(instr); > + break; > + } > + > + case nir_cf_node_if: { > + nir_if *if_stmt = nir_cf_node_as_if(node); > + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) > + cleanup_cf_node(child); > + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) > + cleanup_cf_node(child); > + > + struct set *if_uses; > + if (if_stmt->condition.is_ssa) { > + if_uses = if_stmt->condition.ssa->if_uses; > + } else { > + if_uses = if_stmt->condition.reg.reg->if_uses; > + } > + > + struct set_entry *entry = _mesa_set_search(if_uses, if_stmt); > + assert(entry); > + _mesa_set_remove(if_uses, entry); > + break; > + } > + > + case nir_cf_node_loop: { > + nir_loop *loop = nir_cf_node_as_loop(node); > + foreach_list_typed(nir_cf_node, child, node, &loop->body) > + cleanup_cf_node(child); > + break; > + } > + case nir_cf_node_function: { > + nir_function_impl *impl = nir_cf_node_as_function(node); > + foreach_list_typed(nir_cf_node, child, node, &impl->body) > + cleanup_cf_node(child); > + break; > + } > + default: > + unreachable("Invalid CF node type"); > + } > +} > + > void > nir_cf_node_remove(nir_cf_node *node) > { > @@ -1175,6 +1227,8 @@ nir_cf_node_remove(nir_cf_node *node) >exec_node_remove(&node->node); >stitch_blocks(before_block, after_block); > } > + > + cleanup_cf_node(node); > } > > static bool > -- > 2.2.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev