On Thu, Jun 25, 2015 at 12:29 PM, Connor Abbott <cwabbo...@gmail.com> wrote: > Before, we would use registers, but set a magical "parent_instr" field > to indicate that it was actually purely an SSA value (i.e., it wasn't > involved in any phi nodes). Instead, just use SSA values directly, which > lets us get rid of the hack and reduces memory usage since we're not > allocating a nir_register for every value. It also makes our handling of > load_const more consistent compared to the other instructions. > > Signed-off-by: Connor Abbott <cwabbo...@gmail.com> > --- > src/mesa/drivers/dri/i965/brw_fs.h | 5 ++ > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 73 > ++++++++++++++-------- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 1 + > src/mesa/drivers/dri/i965/brw_nir.c | 2 +- > .../dri/i965/brw_nir_analyze_boolean_resolves.c | 12 ++-- > 5 files changed, 59 insertions(+), 34 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h > b/src/mesa/drivers/dri/i965/brw_fs.h > index 243baf6..a6fee35 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.h > +++ b/src/mesa/drivers/dri/i965/brw_fs.h > @@ -249,6 +249,10 @@ public: > void nir_emit_block(nir_block *block); > void nir_emit_instr(nir_instr *instr); > void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr); > + void nir_emit_load_const(const brw::fs_builder &bld, > + nir_load_const_instr *instr); > + void nir_emit_undef(const brw::fs_builder &bld, > + nir_ssa_undef_instr *instr); > void nir_emit_intrinsic(const brw::fs_builder &bld, > nir_intrinsic_instr *instr); > void nir_emit_texture(const brw::fs_builder &bld, > @@ -345,6 +349,7 @@ public: > unsigned max_grf; > > fs_reg *nir_locals; > + fs_reg *nir_ssa_values; > fs_reg *nir_globals; > fs_reg nir_inputs; > fs_reg nir_outputs; > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index 59081ea..58896d7 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -366,6 +366,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl) > nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size); > } > > + nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg, > + impl->ssa_alloc); > + > nir_emit_cf_list(&impl->body); > } > > @@ -459,9 +462,11 @@ fs_visitor::nir_emit_instr(nir_instr *instr) > break; > > case nir_instr_type_load_const: > - /* We can hit these, but we do nothing now and use them as > - * immediates later. > - */ > + nir_emit_load_const(abld, nir_instr_as_load_const(instr)); > + break; > + > + case nir_instr_type_ssa_undef: > + nir_emit_undef(abld, nir_instr_as_ssa_undef(instr)); > break; > > case nir_instr_type_jump: > @@ -495,17 +500,12 @@ bool > fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr, > const fs_reg &result) > { > - if (instr->src[0].src.is_ssa || > - !instr->src[0].src.reg.reg || > - !instr->src[0].src.reg.reg->parent_instr) > - return false; > - > - if (instr->src[0].src.reg.reg->parent_instr->type != > - nir_instr_type_intrinsic) > + if (!instr->src[0].src.is_ssa || > + instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic) > return false; > > nir_intrinsic_instr *src0 = > - nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr); > + nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr); > > if (src0->intrinsic != nir_intrinsic_load_front_face) > return false; > @@ -1146,6 +1146,25 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, > nir_alu_instr *instr) > } > } > > +void > +fs_visitor::nir_emit_load_const(const fs_builder &bld, > + nir_load_const_instr *instr) > +{ > + fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components); > + > + for (unsigned i = 0; i < instr->def.num_components; i++) > + bld.MOV(offset(reg, i), fs_reg(instr->value.i[i])); > + > + nir_ssa_values[instr->def.index] = reg; > +} > + > +void > +fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr) > +{ > + nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D, > + instr->def.num_components); > +} > + > static fs_reg > fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg, > unsigned base_offset, nir_src *indirect) > @@ -1171,30 +1190,30 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register > *nir_reg, > fs_reg > fs_visitor::get_nir_src(nir_src src) > { > + fs_reg reg; > if (src.is_ssa) { > - assert(src.ssa->parent_instr->type == nir_instr_type_load_const); > - nir_load_const_instr *load = > nir_instr_as_load_const(src.ssa->parent_instr); > - fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components); > - > - for (unsigned i = 0; i < src.ssa->num_components; ++i) > - bld.MOV(offset(reg, i), fs_reg(load->value.i[i])); > - > - return reg;
I understand that moving this stuff to emit_load_const has a very nice unifying effect on the visitor. However, it also has a subtle effect on generated code that's worth at least documenting in the commit message. In particular, the MOV(dst, imm) is now in a different basic block than the instruction that was using it. I don't think it's a big deal, but you should put it in the commit message. Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> > + reg = nir_ssa_values[src.ssa->index]; > } else { > - fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, > - src.reg.indirect); > - > - /* to avoid floating-point denorm flushing problems, set the type by > - * default to D - instructions that need floating point semantics will > set > - * this to F if they need to > - */ > - return retype(reg, BRW_REGISTER_TYPE_D); > + reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset, > + src.reg.indirect); > } > + > + /* to avoid floating-point denorm flushing problems, set the type by > + * default to D - instructions that need floating point semantics will set > + * this to F if they need to > + */ > + return retype(reg, BRW_REGISTER_TYPE_D); > } > > fs_reg > fs_visitor::get_nir_dest(nir_dest dest) > { > + if (dest.is_ssa) { > + nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F, > + dest.ssa.num_components); > + return nir_ssa_values[dest.ssa.index]; > + } > + > return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset, > dest.reg.indirect); > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 9a4bad6..90f6219 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -2012,6 +2012,7 @@ fs_visitor::fs_visitor(const struct brw_compiler > *compiler, void *log_data, > this->no16_msg = NULL; > > this->nir_locals = NULL; > + this->nir_ssa_values = NULL; > this->nir_globals = NULL; > > memset(&this->payload, 0, sizeof(this->payload)); > diff --git a/src/mesa/drivers/dri/i965/brw_nir.c > b/src/mesa/drivers/dri/i965/brw_nir.c > index 3e154c1..d87e783 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir.c > +++ b/src/mesa/drivers/dri/i965/brw_nir.c > @@ -156,7 +156,7 @@ brw_create_nir(struct brw_context *brw, > nir_print_shader(nir, stderr); > } > > - nir_convert_from_ssa(nir, true); > + nir_convert_from_ssa(nir, false); > nir_validate_shader(nir); > > /* This is the last pass we run before we start emitting stuff. It > diff --git a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c > b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c > index f0b018c..9eb0ed9 100644 > --- a/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c > +++ b/src/mesa/drivers/dri/i965/brw_nir_analyze_boolean_resolves.c > @@ -43,8 +43,8 @@ > static uint8_t > get_resolve_status_for_src(nir_src *src) > { > - nir_instr *src_instr = nir_src_get_parent_instr(src); > - if (src_instr) { > + if (src->is_ssa) { > + nir_instr *src_instr = src->ssa->parent_instr; > uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; > > /* If the source instruction needs resolve, then from the perspective > @@ -66,8 +66,8 @@ get_resolve_status_for_src(nir_src *src) > static bool > src_mark_needs_resolve(nir_src *src, void *void_state) > { > - nir_instr *src_instr = nir_src_get_parent_instr(src); > - if (src_instr) { > + if (src->is_ssa) { > + nir_instr *src_instr = src->ssa->parent_instr; > uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; > > /* If the source instruction is unresolved, then mark it as needing > @@ -172,11 +172,11 @@ analyze_boolean_resolves_block(nir_block *block, void > *void_state) > resolve_status = BRW_NIR_NON_BOOLEAN; > } > > - /* If the destination is SSA-like, go ahead allow unresolved > booleans. > + /* If the destination is SSA, go ahead allow unresolved booleans. > * If the destination register doesn't have a well-defined > parent_instr > * we need to resolve immediately. > */ > - if (alu->dest.dest.reg.reg->parent_instr == NULL && > + if (!alu->dest.dest.is_ssa && > resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) { > resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE; > } > -- > 2.4.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev