On Wed, Apr 8, 2015 at 11:14 AM, Eric Anholt <e...@anholt.net> wrote: > Rob Clark <robdcl...@gmail.com> writes: > >> From: Rob Clark <robcl...@freedesktop.org> >> >> Since the rest of NIR really would rather have these as variables rather >> than registers, create a nir_variable per array. But rather than >> completely re-arrange ttn to be variable based rather than register >> based, keep the registers. In the cases where there is a matching var >> for the reg, ttn_emit_instruction will append the appropriate intrinsic >> to get things back from the shadow reg into the variable. >> >> NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give >> an array id. But those just kinda suck, and should really go away. >> AFAICT we don't get those from glsl. Might be an issue for some other >> state tracker. >> >> v2: rework to use load_var/store_var with deref chains >> >> Signed-off-by: Rob Clark <robcl...@freedesktop.org> >> --- >> src/gallium/auxiliary/nir/tgsi_to_nir.c | 122 >> +++++++++++++++++++++++++++----- >> 1 file changed, 103 insertions(+), 19 deletions(-) >> >> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c >> b/src/gallium/auxiliary/nir/tgsi_to_nir.c >> index da935a4..f4c0bad 100644 >> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c >> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c >> @@ -44,6 +44,7 @@ >> struct ttn_reg_info { >> /** nir register containing this TGSI index. */ >> nir_register *reg; >> + nir_variable *var; >> /** Offset (in vec4s) from the start of var for this TGSI index. */ >> int offset; >> }; >> @@ -121,22 +122,29 @@ ttn_emit_declaration(struct ttn_compile *c) >> >> if (file == TGSI_FILE_TEMPORARY) { >> nir_register *reg; >> - if (c->scan->indirect_files & (1 << file)) { >> + nir_variable *var = NULL; >> + >> + if (decl->Declaration.Array) { >> + /* for arrays, the register created just serves as a >> + * shadow register. We append intrinsic_store_global >> + * after the tgsi instruction is translated to move >> + * back from the shadow register to the variable >> + */ >> + var = rzalloc(b->shader, nir_variable); >> + >> + var->type = glsl_array_type(glsl_vec4_type(), array_size); >> + var->data.mode = nir_var_global; >> + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID); >> + >> + exec_list_push_tail(&b->shader->globals, &var->node); >> + } >> + >> + for (i = 0; i < array_size; i++) { >> reg = nir_local_reg_create(b->impl); >> reg->num_components = 4; >> - reg->num_array_elems = array_size; >> - >> - for (i = 0; i < array_size; i++) { >> - c->temp_regs[decl->Range.First + i].reg = reg; >> - c->temp_regs[decl->Range.First + i].offset = i; >> - } >> - } else { >> - for (i = 0; i < array_size; i++) { >> - reg = nir_local_reg_create(b->impl); >> - reg->num_components = 4; >> - c->temp_regs[decl->Range.First + i].reg = reg; >> - c->temp_regs[decl->Range.First + i].offset = 0; >> - } >> + c->temp_regs[decl->Range.First + i].reg = reg; >> + c->temp_regs[decl->Range.First + i].var = var; >> + c->temp_regs[decl->Range.First + i].offset = i; > > Continuing to use array_size here doesn't make any sense to me, since if > you're not handling variable array indices when generating stores into > the array. So all you want is a single vec4 reg available so that you > have something that our ALU op generation can do writemasked stores > into, and you're picking an arbitrary one of them in ttn_get_dest(). > > I think this would make a ton more sense if ttn_get_dest() just returned > a new vec4 local reg for the temporary, instead of having this > sort-of-shadow thing. >
so the shadow registers did make things like: DCL TEMP[0..2], ARRAY(1), LOCAL .... 1: MOV TEMP[1].x, IN[1].wwww 2: MOV TEMP[1].yz, IN[2].yxyy much easier to deal with.. I'm still thinking about how to handle that w/ the create-new-temp-register-each-time approach.. but yeah, doesn't work as well if you have indirect dst. BR, -R >> } >> } else if (file == TGSI_FILE_ADDRESS) { >> c->addr_reg = nir_local_reg_create(b->impl); >> @@ -245,6 +253,32 @@ ttn_emit_immediate(struct ttn_compile *c) >> static nir_src * >> ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register >> *indirect); >> >> +/* generate either a constant or indirect deref chain for accessing an >> + * array variable. >> + */ >> +static nir_deref_var * >> +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset, >> + struct tgsi_ind_register *indirect) >> +{ >> + nir_builder *b = &c->build; >> + nir_deref_var *deref = nir_deref_var_create(b->shader, var); >> + nir_deref_array *arr = nir_deref_array_create(b->shader); >> + >> + arr->base_offset = offset; >> + arr->deref.type = glsl_get_array_element(var->type); >> + >> + if (indirect) { >> + arr->deref_array_type = nir_deref_array_type_indirect; >> + arr->indirect = nir_src_for_reg(c->addr_reg); >> + } else { >> + arr->deref_array_type = nir_deref_array_type_direct; >> + } >> + >> + deref->deref.child = &arr->deref; >> + >> + return deref; >> +} >> + >> static nir_src >> ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned >> index, >> struct tgsi_ind_register *indirect) >> @@ -256,10 +290,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, >> unsigned file, unsigned index, >> >> switch (file) { >> case TGSI_FILE_TEMPORARY: >> - src.reg.reg = c->temp_regs[index].reg; >> - src.reg.base_offset = c->temp_regs[index].offset; >> - if (indirect) >> - src.reg.indirect = ttn_src_for_indirect(c, indirect); >> + if (c->temp_regs[index].var) { >> + unsigned offset = c->temp_regs[index].offset; >> + nir_variable *var = c->temp_regs[index].var; >> + nir_intrinsic_instr *load; >> + >> + load = nir_intrinsic_instr_create(b->shader, >> + nir_intrinsic_load_var); >> + load->num_components = 4; >> + load->variables[0] = ttn_array_deref(c, var, offset, indirect); >> + >> + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); >> + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr); >> + >> + src = nir_src_for_ssa(&load->dest.ssa); >> + >> + } else { >> + assert(!indirect); >> + src.reg.reg = c->temp_regs[index].reg; >> + } >> break; >> >> case TGSI_FILE_ADDRESS: >> @@ -363,6 +412,21 @@ ttn_get_dest(struct ttn_compile *c, struct >> tgsi_full_dst_register *tgsi_fdst) >> return dest; >> } > > I don't see where you deleted the "if (tgsi_dst->Indirect)" line in > ttn_get_dest, so I'm confused how this works at all since your variable > stores don't handle the indirect. > >> >> +static nir_variable * >> +ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst) >> +{ >> + struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register; >> + unsigned index = tgsi_dst->Index; >> + >> + if (tgsi_dst->File == TGSI_FILE_TEMPORARY) { >> + return c->temp_regs[index].var; >> + } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) { >> + return c->output_regs[index].var; >> + } >> + >> + return NULL; >> +} >> + >> static nir_ssa_def * >> ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc) >> { >> @@ -1133,6 +1197,7 @@ ttn_emit_instruction(struct ttn_compile *c) >> struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction; >> unsigned i; >> unsigned tgsi_op = tgsi_inst->Instruction.Opcode; >> + struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0]; >> >> if (tgsi_op == TGSI_OPCODE_END) >> return; >> @@ -1141,7 +1206,7 @@ ttn_emit_instruction(struct ttn_compile *c) >> for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) { >> src[i] = ttn_get_src(c, &tgsi_inst->Src[i]); >> } >> - nir_alu_dest dest = ttn_get_dest(c, &tgsi_inst->Dst[0]); >> + nir_alu_dest dest = ttn_get_dest(c, tgsi_dst); >> >> switch (tgsi_op) { >> case TGSI_OPCODE_RSQ: >> @@ -1331,6 +1396,25 @@ ttn_emit_instruction(struct ttn_compile *c) >> assert(!dest.dest.is_ssa); >> ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest))); >> } >> + >> + /* if the dst has a matching var, append store_global to move >> + * output from reg to var >> + */ >> + nir_variable *var = ttn_get_var(c, tgsi_dst); >> + if (var) { >> + unsigned index = tgsi_dst->Register.Index; >> + unsigned offset = c->temp_regs[index].offset; >> + nir_intrinsic_instr *store = >> + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); >> + struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ? >> + &tgsi_dst->Indirect : NULL; >> + >> + store->num_components = 4; >> + store->variables[0] = ttn_array_deref(c, var, offset, indirect); >> + store->src[0] = nir_src_for_reg(dest.dest.reg.reg); >> + >> + nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr); >> + } >> } > > What about a destination with a writemask? You'd need to load from the > var and merge in before storing. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev