On Wed, Apr 8, 2015 at 6:34 PM, Rob Clark <robdcl...@gmail.com> wrote: > From: Rob Clark <robcl...@freedesktop.org> > > Since the rest of NIR really would rather have these as variables rather > than registers, create a nir_variable per array. But rather than > completely re-arrange ttn to be variable based rather than register > based, keep the registers. In the cases where there is a matching var > for the reg, ttn_emit_instruction will append the appropriate intrinsic > to get things back from the shadow reg into the variable.
hmm, bleh, I probably should have updated the commit msg before hitting send.. this version drops the pre-declared registers for array elements, and instead creates temp regs on demand as Eric suggested (and mentioned in the v3 note below) BR, -R > NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give > an array id. But those just kinda suck, and should really go away. > AFAICT we don't get those from glsl. Might be an issue for some other > state tracker. > > v2: rework to use load_var/store_var with deref chains > v3: create new "burner" reg for temporarily holding the (potentially > writemask'd) dest after each instruction; add load_var to initialize > temporary dest in case not all components are overwritten > > Signed-off-by: Rob Clark <robcl...@freedesktop.org> > --- > src/gallium/auxiliary/nir/tgsi_to_nir.c | 159 > +++++++++++++++++++++++++++++--- > 1 file changed, 144 insertions(+), 15 deletions(-) > > diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c > b/src/gallium/auxiliary/nir/tgsi_to_nir.c > index fcccdad..c3332cc 100644 > --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c > +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c > @@ -44,6 +44,7 @@ > struct ttn_reg_info { > /** nir register containing this TGSI index. */ > nir_register *reg; > + nir_variable *var; > /** Offset (in vec4s) from the start of var for this TGSI index. */ > int offset; > }; > @@ -120,21 +121,32 @@ ttn_emit_declaration(struct ttn_compile *c) > unsigned i; > > if (file == TGSI_FILE_TEMPORARY) { > - nir_register *reg; > - if (c->scan->indirect_files & (1 << file)) { > - reg = nir_local_reg_create(b->impl); > - reg->num_components = 4; > - reg->num_array_elems = array_size; > + if (decl->Declaration.Array) { > + /* for arrays, we create variables instead of registers: */ > + nir_variable *var = rzalloc(b->shader, nir_variable); > + > + var->type = glsl_array_type(glsl_vec4_type(), array_size); > + var->data.mode = nir_var_global; > + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); > + > + exec_list_push_tail(&b->shader->globals, &var->node); > > for (i = 0; i < array_size; i++) { > - c->temp_regs[decl->Range.First + i].reg = reg; > + /* point all the matching slots to the same var, > + * with appropriate offset set, mostly just so > + * we know what to do when tgsi does a non-indirect > + * access > + */ > + c->temp_regs[decl->Range.First + i].reg = NULL; > + c->temp_regs[decl->Range.First + i].var = var; > c->temp_regs[decl->Range.First + i].offset = i; > } > } else { > for (i = 0; i < array_size; i++) { > - reg = nir_local_reg_create(b->impl); > + nir_register *reg = nir_local_reg_create(b->impl); > reg->num_components = 4; > c->temp_regs[decl->Range.First + i].reg = reg; > + c->temp_regs[decl->Range.First + i].var = NULL; > c->temp_regs[decl->Range.First + i].offset = 0; > } > } > @@ -245,6 +257,32 @@ ttn_emit_immediate(struct ttn_compile *c) > static nir_src * > ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register > *indirect); > > +/* generate either a constant or indirect deref chain for accessing an > + * array variable. > + */ > +static nir_deref_var * > +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset, > + struct tgsi_ind_register *indirect) > +{ > + nir_builder *b = &c->build; > + nir_deref_var *deref = nir_deref_var_create(b->shader, var); > + nir_deref_array *arr = nir_deref_array_create(b->shader); > + > + arr->base_offset = offset; > + arr->deref.type = glsl_get_array_element(var->type); > + > + if (indirect) { > + arr->deref_array_type = nir_deref_array_type_indirect; > + arr->indirect = nir_src_for_reg(c->addr_reg); > + } else { > + arr->deref_array_type = nir_deref_array_type_direct; > + } > + > + deref->deref.child = &arr->deref; > + > + return deref; > +} > + > static nir_src > ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned > index, > struct tgsi_ind_register *indirect) > @@ -256,10 +294,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, > unsigned file, unsigned index, > > switch (file) { > case TGSI_FILE_TEMPORARY: > - src.reg.reg = c->temp_regs[index].reg; > - src.reg.base_offset = c->temp_regs[index].offset; > - if (indirect) > - src.reg.indirect = ttn_src_for_indirect(c, indirect); > + if (c->temp_regs[index].var) { > + unsigned offset = c->temp_regs[index].offset; > + nir_variable *var = c->temp_regs[index].var; > + nir_intrinsic_instr *load; > + > + load = nir_intrinsic_instr_create(b->shader, > + nir_intrinsic_load_var); > + load->num_components = 4; > + load->variables[0] = ttn_array_deref(c, var, offset, indirect); > + > + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); > + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); > + > + src = nir_src_for_ssa(&load->dest.ssa); > + > + } else { > + assert(!indirect); > + src.reg.reg = c->temp_regs[index].reg; > + } > break; > > case TGSI_FILE_ADDRESS: > @@ -345,8 +398,49 @@ ttn_get_dest(struct ttn_compile *c, struct > tgsi_full_dst_register *tgsi_fdst) > memset(&dest, 0, sizeof(dest)); > > if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { > - dest.dest.reg.reg = c->temp_regs[index].reg; > - dest.dest.reg.base_offset = c->temp_regs[index].offset; > + if (c->temp_regs[index].var) { > + nir_builder *b = &c->build; > + nir_intrinsic_instr *load; > + struct tgsi_ind_register *indirect = > + tgsi_dst->Indirect ? &tgsi_fdst->Indirect : NULL; > + nir_register *reg; > + > + /* this works, because TGSI will give us a base offset > + * (in case of indirect index) that points back into > + * the array. Access can be direct or indirect, we > + * don't really care. Just create a one-shot dst reg > + * that will get store_var'd back into the array var > + * at the end of ttn_emit_instruction() > + */ > + reg = nir_local_reg_create(c->build.impl); > + reg->num_components = 4; > + dest.dest.reg.reg = reg; > + dest.dest.reg.base_offset = 0; > + > + /* since the alu op might not write to all components > + * of the temporary, we must first do a load_var to > + * get the previous array elements into the register. > + * This is one area that NIR could use a bit of > + * improvement (or opt pass to clean up the mess > + * once things are scalarized) > + */ > + > + load = nir_intrinsic_instr_create(c->build.shader, > + nir_intrinsic_load_var); > + load->num_components = 4; > + load->variables[0] = > + ttn_array_deref(c, c->temp_regs[index].var, > + c->temp_regs[index].offset, > + indirect); > + > + load->dest = nir_dest_for_reg(reg); > + > + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); > + } else { > + assert(!tgsi_dst->Indirect); > + dest.dest.reg.reg = c->temp_regs[index].reg; > + dest.dest.reg.base_offset = c->temp_regs[index].offset; > + } > } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { > dest.dest.reg.reg = c->output_regs[index].reg; > dest.dest.reg.base_offset = c->output_regs[index].offset; > @@ -358,12 +452,27 @@ ttn_get_dest(struct ttn_compile *c, struct > tgsi_full_dst_register *tgsi_fdst) > dest.write_mask = tgsi_dst->WriteMask; > dest.saturate = false; > > - if (tgsi_dst->Indirect) > + if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) > dest.dest.reg.indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect); > > return dest; > } > > +static nir_variable * > +ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) > +{ > + struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register; > + unsigned index = tgsi_dst->Index; > + > + if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { > + return c->temp_regs[index].var; > + } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { > + return c->output_regs[index].var; > + } > + > + return NULL; > +} > + > static nir_ssa_def * > ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc) > { > @@ -1134,6 +1243,7 @@ ttn_emit_instruction(struct ttn_compile *c) > struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; > unsigned i; > unsigned tgsi_op = tgsi_inst->Instruction.Opcode; > + struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0]; > > if (tgsi_op == TGSI_OPCODE_END) > return; > @@ -1142,7 +1252,7 @@ ttn_emit_instruction(struct ttn_compile *c) > for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) { > src[i] = ttn_get_src(c, &tgsi_inst->Src[i]); > } > - nir_alu_dest dest = ttn_get_dest(c, &tgsi_inst->Dst[0]); > + nir_alu_dest dest = ttn_get_dest(c, tgsi_dst); > > switch (tgsi_op) { > case TGSI_OPCODE_RSQ: > @@ -1332,6 +1442,25 @@ ttn_emit_instruction(struct ttn_compile *c) > assert(!dest.dest.is_ssa); > ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest))); > } > + > + /* if the dst has a matching var, append store_global to move > + * output from reg to var > + */ > + nir_variable *var = ttn_get_var(c, tgsi_dst); > + if (var) { > + unsigned index = tgsi_dst->Register.Index; > + unsigned offset = c->temp_regs[index].offset; > + nir_intrinsic_instr *store = > + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); > + struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ? > + &tgsi_dst->Indirect : NULL; > + > + store->num_components = 4; > + store->variables[0] = ttn_array_deref(c, var, offset, indirect); > + store->src[0] = nir_src_for_reg(dest.dest.reg.reg); > + > + nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); > + } > } > > /** > -- > 2.1.0 > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev