On Mon, Mar 7, 2016 at 12:46 AM, Samuel Iglesias Gonsálvez < sigles...@igalia.com> wrote:
> From: Connor Abbott <connor.w.abb...@intel.com> > > When we replace an expresion we have to compute bitsize information for the > replacement. We do this in two passes to validate that bitsize information > is consistent and correct: first we propagate bitsize from child nodes to > parent, then we do it the other way around, starting from the original's > instruction destination bitsize. > > v2 (Iago): > - Always use nir_type_bool32 instead of nir_type_bool when generating > algebraic optimizations. Before we used nir_type_bool32 with constants > and nir_type_bool with variables. > - Fix bool comparisons in nir_search.c to account for bitsized types. > > v3 (Sam): > - Unpack the double constant value as unsigned long long (8 bytes) in > nir_algrebraic.py. > > Signed-off-by: Iago Toral Quiroga <ito...@igalia.com> > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> > --- > src/compiler/nir/nir_algebraic.py | 22 +++- > src/compiler/nir/nir_search.c | 244 > ++++++++++++++++++++++++++++++++++---- > src/compiler/nir/nir_search.h | 8 +- > 3 files changed, 247 insertions(+), 27 deletions(-) > > diff --git a/src/compiler/nir/nir_algebraic.py > b/src/compiler/nir/nir_algebraic.py > index 2357b57..1818877 100644 > --- a/src/compiler/nir/nir_algebraic.py > +++ b/src/compiler/nir/nir_algebraic.py > @@ -63,11 +63,11 @@ class Value(object): > static const ${val.c_type} ${val.name} = { > { ${val.type_enum} }, > % if isinstance(val, Constant): > - { ${hex(val)} /* ${val.value} */ }, > + ${val.type()}, { ${hex(val)} /* ${val.value} */ }, > % elif isinstance(val, Variable): > ${val.index}, /* ${val.var_name} */ > ${'true' if val.is_constant else 'false'}, > - nir_type_${ val.required_type or 'invalid' }, > + ${val.type() or 'nir_type_invalid' }, > % elif isinstance(val, Expression): > nir_op_${val.opcode}, > { ${', '.join(src.c_ptr for src in val.sources)} }, > @@ -107,10 +107,18 @@ class Constant(Value): > if isinstance(self.value, (int, long)): > return hex(self.value) > elif isinstance(self.value, float): > - return hex(struct.unpack('I', struct.pack('f', self.value))[0]) > + return hex(struct.unpack('Q', struct.pack('d', self.value))[0]) > else: > assert False > > + def type(self): > + if isinstance(self.value, (bool)): > + return "nir_type_bool32" > + elif isinstance(self.value, (int, long)): > + return "nir_type_int" > + elif isinstance(self.value, float): > + return "nir_type_float" > + > _var_name_re = > re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?") > > class Variable(Value): > @@ -129,6 +137,14 @@ class Variable(Value): > > self.index = varset[self.var_name] > > + def type(self): > + if self.required_type == 'bool': > + return "nir_type_bool32" > + elif self.required_type in ('int', 'unsigned'): > + return "nir_type_int" > + elif self.required_type == 'float': > + return "nir_type_float" > + > class Expression(Value): > def __init__(self, expr, name_base, varset): > Value.__init__(self, name_base, "expression") > diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c > index f509ce6..e874c79 100644 > --- a/src/compiler/nir/nir_search.c > +++ b/src/compiler/nir/nir_search.c > @@ -62,7 +62,8 @@ alu_instr_is_bool(nir_alu_instr *instr) > case nir_op_inot: > return src_is_bool(instr->src[0].src); > default: > - return nir_op_infos[instr->op].output_type == nir_type_bool; > + return (nir_op_infos[instr->op].output_type & > NIR_ALU_TYPE_BASE_TYPE_MASK) > + == nir_type_bool; > } > } > > @@ -125,8 +126,10 @@ match_value(const nir_search_value *value, > nir_alu_instr *instr, unsigned src, > nir_alu_instr *src_alu = > nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); > > - if (nir_op_infos[src_alu->op].output_type != var->type && > - !(var->type == nir_type_bool && > alu_instr_is_bool(src_alu))) > + if ((nir_op_infos[src_alu->op].output_type & > + NIR_ALU_TYPE_BASE_TYPE_MASK) != var->type && > + !((var->type & NIR_ALU_TYPE_BASE_TYPE_MASK) == > nir_type_bool && > + alu_instr_is_bool(src_alu))) > return false; > } > > @@ -158,21 +161,65 @@ match_value(const nir_search_value *value, > nir_alu_instr *instr, unsigned src, > nir_load_const_instr *load = > nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr); > > - switch (nir_op_infos[instr->op].input_types[src]) { > + switch (const_val->type) { > case nir_type_float: > for (unsigned i = 0; i < num_components; ++i) { > - if (load->value.f[new_swizzle[i]] != const_val->data.f) > + double val; > + switch (load->def.bit_size) { > + case 32: > + val = load->value.f[new_swizzle[i]]; > + break; > + case 64: > + val = load->value.d[new_swizzle[i]]; > + break; > + default: > + unreachable("unknown bit size"); > + } > + > + if (val != const_val->data.d) > return false; > } > return true; > + > case nir_type_int: > + for (unsigned i = 0; i < num_components; ++i) { > + int64_t val; > + switch (load->def.bit_size) { > + case 32: > + val = load->value.i[new_swizzle[i]]; > + break; > + case 64: > + val = load->value.l[new_swizzle[i]]; > + break; > + default: > + unreachable("unknown bit size"); > + } > + > + if (val != const_val->data.i) > + return false; > + } > + return true; > + > case nir_type_uint: > - case nir_type_bool: > + case nir_type_bool32: > for (unsigned i = 0; i < num_components; ++i) { > - if (load->value.i[new_swizzle[i]] != const_val->data.i) > + uint64_t val; > + switch (load->def.bit_size) { > + case 32: > + val = load->value.u[new_swizzle[i]]; > + break; > + case 64: > + val = load->value.ul[new_swizzle[i]]; > + break; > + default: > + unreachable("unknown bit size"); > + } > + > + if (val != const_val->data.u) > return false; > 64-bit. Also, we may want bool to be its own case. I think keeping it with uint is probably fine. > } > return true; > + > default: > unreachable("Invalid alu source type"); > } > @@ -244,9 +291,123 @@ match_expression(const nir_search_expression *expr, > nir_alu_instr *instr, > } > } > > +typedef struct bitsize_tree { > + unsigned num_srcs; > + struct bitsize_tree *srcs[4]; > + > + unsigned common_size; > + bool is_src_sized[4]; > + bool is_dest_sized; > + > + unsigned dest_size; > + unsigned src_size[4]; > +} bitsize_tree; > + > +static bitsize_tree * > +build_bitsize_tree(void *mem_ctx, struct match_state *state, > + const nir_search_value *value) > +{ > + bitsize_tree *tree = ralloc(mem_ctx, bitsize_tree); > + > + switch (value->type) { > + case nir_search_value_expression: { > + nir_search_expression *expr = nir_search_value_as_expression(value); > + nir_op_info info = nir_op_infos[expr->opcode]; > + tree->num_srcs = info.num_inputs; > + tree->common_size = 0; > + for (unsigned i = 0; i < info.num_inputs; i++) { > + tree->is_src_sized[i] = !!(info.input_types[i] & > NIR_ALU_TYPE_SIZE_MASK); > + if (tree->is_src_sized[i]) > + tree->src_size[i] = info.input_types[i] & > NIR_ALU_TYPE_SIZE_MASK; > + tree->srcs[i] = build_bitsize_tree(mem_ctx, state, > expr->srcs[i]); > + } > + tree->is_dest_sized = !!(info.output_type & NIR_ALU_TYPE_SIZE_MASK); > + if (tree->is_dest_sized) > + tree->dest_size = info.output_type & NIR_ALU_TYPE_SIZE_MASK; > + break; > + } > + > + case nir_search_value_variable: { > + nir_search_variable *var = nir_search_value_as_variable(value); > + tree->num_srcs = 0; > + tree->is_dest_sized = true; > + tree->dest_size = > nir_src_bit_size(state->variables[var->variable].src); > + break; > + } > + > + case nir_search_value_constant: { > + tree->num_srcs = 0; > + tree->is_dest_sized = false; > + tree->common_size = 0; > + break; > + } > + } > + > + return tree; > +} > + > +static unsigned > +bitsize_tree_filter_up(bitsize_tree *tree) > +{ > + for (unsigned i = 0; i < tree->num_srcs; i++) { > + unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]); > + if (src_size == 0) > + continue; > + > + if (tree->is_src_sized[i]) { > + assert(src_size == tree->src_size[i]); > + } else if (tree->common_size != 0) { > + assert(src_size == tree->common_size); > + tree->src_size[i] = src_size; > + } else { > + tree->common_size = src_size; > + tree->src_size[i] = src_size; > + } > + } > + > + if (tree->num_srcs && tree->common_size) { > + if (tree->dest_size == 0) > + tree->dest_size = tree->common_size; > + else if (!tree->is_dest_sized) > + assert(tree->dest_size == tree->common_size); > + > + for (unsigned i = 0; i < tree->num_srcs; i++) { > + if (!tree->src_size[i]) > + tree->src_size[i] = tree->common_size; > + } > + } > + > + return tree->dest_size; > +} > + > +static void > +bitsize_tree_filter_down(bitsize_tree *tree, unsigned size) > +{ > + if (tree->dest_size) > + assert(tree->dest_size == size); > + else > + tree->dest_size = size; > + > + if (!tree->is_dest_sized) { > + if (tree->common_size) > + assert(tree->common_size == size); > + else > + tree->common_size = size; > + } > + > + for (unsigned i = 0; i < tree->num_srcs; i++) { > + if (!tree->src_size[i]) { > + assert(tree->common_size); > + tree->src_size[i] = tree->common_size; > + } > + bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]); > + } > +} > Oh, man... This is so ugly... Unfortunately, I know of no better way to do it so we'll go with it. > + > static nir_alu_src > -construct_value(const nir_search_value *value, nir_alu_type type, > - unsigned num_components, struct match_state *state, > +construct_value(const nir_search_value *value, > + unsigned num_components, bitsize_tree *bitsize, > + struct match_state *state, > nir_instr *instr, void *mem_ctx) > { > switch (value->type) { > @@ -257,7 +418,8 @@ construct_value(const nir_search_value *value, > nir_alu_type type, > num_components = nir_op_infos[expr->opcode].output_size; > > nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode); > - nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, 32, > NULL); > + nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, > + bitsize->dest_size, NULL); > alu->dest.write_mask = (1 << num_components) - 1; > alu->dest.saturate = false; > > @@ -269,8 +431,7 @@ construct_value(const nir_search_value *value, > nir_alu_type type, > num_components = nir_op_infos[alu->op].input_sizes[i]; > > alu->src[i] = construct_value(expr->srcs[i], > - > nir_op_infos[alu->op].input_types[i], > - num_components, > + num_components, bitsize->srcs[i], > state, instr, mem_ctx); > } > > @@ -301,23 +462,57 @@ construct_value(const nir_search_value *value, > nir_alu_type type, > const nir_search_constant *c = nir_search_value_as_constant(value); > nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, > 1); > > - switch (type) { > + switch (c->type) { > case nir_type_float: > - load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f); > - load->value.f[0] = c->data.f; > + load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.d); > + switch (bitsize->dest_size) { > + case 32: > + load->value.f[0] = c->data.d; > + break; > + case 64: > + load->value.d[0] = c->data.d; > + break; > + default: > + unreachable("unknown bit size"); > + } > break; > + > case nir_type_int: > - load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); > - load->value.i[0] = c->data.i; > + load->def.name = ralloc_asprintf(mem_ctx, "%ld", c->data.i); > + switch (bitsize->dest_size) { > + case 32: > + load->value.i[0] = c->data.i; > + break; > + case 64: > + load->value.l[0] = c->data.i; > + break; > + default: > + unreachable("unknown bit size"); > + } > break; > + > case nir_type_uint: > - case nir_type_bool: > + load->def.name = ralloc_asprintf(mem_ctx, "%lu", c->data.u); > + switch (bitsize->dest_size) { > + case 32: > + load->value.u[0] = c->data.u; > + break; > + case 64: > + load->value.ul[0] = c->data.u; > + break; > + default: > + unreachable("unknown bit size"); > + } > + > + case nir_type_bool32: > load->value.u[0] = c->data.u; > break; > default: > unreachable("Invalid alu source type"); > } > > + load->def.bit_size = bitsize->dest_size; > + > nir_instr_insert_before(instr, &load->instr); > > nir_alu_src val; > @@ -352,6 +547,11 @@ nir_replace_instr(nir_alu_instr *instr, const > nir_search_expression *search, > swizzle, &state)) > return NULL; > > + void *bitsize_ctx = ralloc_context(NULL); > + bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace); > + bitsize_tree_filter_up(tree); > + bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size); > + > /* Inserting a mov may be unnecessary. However, it's much easier to > * simply let copy propagation clean this up than to try to go through > * and rewrite swizzles ourselves. > @@ -362,9 +562,9 @@ nir_replace_instr(nir_alu_instr *instr, const > nir_search_expression *search, > instr->dest.dest.ssa.num_components, > instr->dest.dest.ssa.bit_size, NULL); > > - mov->src[0] = construct_value(replace, > nir_op_infos[instr->op].output_type, > - instr->dest.dest.ssa.num_components, > &state, > - &instr->instr, mem_ctx); > + mov->src[0] = construct_value(replace, > + instr->dest.dest.ssa.num_components, > + tree, &state, &instr->instr, mem_ctx); > nir_instr_insert_before(&instr->instr, &mov->instr); > > nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, > @@ -376,5 +576,7 @@ nir_replace_instr(nir_alu_instr *instr, const > nir_search_expression *search, > */ > nir_instr_remove(&instr->instr); > > + ralloc_free(bitsize_ctx); > + > return mov; > } > diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h > index 7d47792..321d6d0 100644 > --- a/src/compiler/nir/nir_search.h > +++ b/src/compiler/nir/nir_search.h > @@ -71,10 +71,12 @@ typedef struct { > typedef struct { > nir_search_value value; > > + nir_alu_type type; > + > union { > - uint32_t u; > - int32_t i; > - float f; > + uint64_t u; > + int64_t i; > + double d; > } data; > } nir_search_constant; > > -- > 2.7.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev