we store those arrays in local memory and reserve some space for each of the arrays. The arrays are stored in a packed format, because we know quite easily the context of each index. We don't do that in TGSI so far.
This causes various issues to come up in the MemoryOpt pass, because ld/st with indirects aren't guarenteed to be aligned to 0x10 anymore. v3: use fixed size vec4 arrays until we fix MemoryOpt v4: fix for 64 bit types v5: use loadFrom helper Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index f594e299645..e73f4ae1e36 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -80,6 +80,7 @@ private: typedef decltype(nir_ssa_def().index) NirSSADefIdx; typedef decltype(nir_ssa_def().bit_size) NirSSADefBitSize; typedef std::unordered_map<NirSSADefIdx, LValues> NirDefMap; + typedef std::unordered_map<NirSSADefIdx, uint32_t> NirArrayLMemOffsets; typedef std::unordered_map<decltype(nir_block().index), BasicBlock*> NirBlockMap; TexTarget convert(glsl_sampler_dim, bool isArray, bool isShadow); @@ -157,6 +158,7 @@ private: NirDefMap ssaDefs; NirDefMap regDefs; + NirArrayLMemOffsets regToLmemOffset; NirBlockMap blocks; unsigned int curLoopDepth; @@ -1259,6 +1261,7 @@ Converter::storeTo(nir_intrinsic_instr *insn, DataFile file, operation op, bool Converter::parseNIR() { + info->bin.tlsSpace = 0; info->io.clipDistances = nir->info.clip_distance_array_size; info->io.cullDistances = nir->info.cull_distance_array_size; @@ -1348,6 +1351,16 @@ Converter::visit(nir_function *function) break; } + nir_foreach_register(reg, &function->impl->registers) { + if (reg->num_array_elems) { + /* TODO: packed variables would be nice, but MemoryOpt fails */ + /* replace 4 with reg->num_components */ + uint32_t size = 4 * reg->num_array_elems * (reg->bit_size / 8); + regToLmemOffset[reg->index] = info->bin.tlsSpace; + info->bin.tlsSpace += size; + } + } + nir_index_ssa_defs(function->impl); foreach_list_typed(nir_cf_node, node, node, &function->impl->body) { if (!visit(node)) @@ -2035,6 +2048,51 @@ Converter::visit(nir_alu_instr *insn) * 2. they basically just merge multiple values into one data type */ CASE_OPFI(mov): + if (!insn->dest.dest.is_ssa && insn->dest.dest.reg.reg->num_array_elems) { + nir_reg_dest& reg = insn->dest.dest.reg; + auto goffset = regToLmemOffset[reg.reg->index]; + auto comps = reg.reg->num_components; + auto size = reg.reg->bit_size / 8; + auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; */ + auto aoffset = csize * reg.base_offset; + Value *indirect = nullptr; + + if (reg.indirect) + indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), + getSrc(reg.indirect, 0), mkImm(csize)); + + for (auto i = 0u; i < comps; ++i) { + if (!((1u << i) & insn->dest.write_mask)) + continue; + + Symbol *sym = mkSymbol(FILE_MEMORY_LOCAL, 0, dType, goffset + aoffset + i * size); + mkStore(OP_STORE, dType, sym, indirect, getSrc(&insn->src[0], i)); + } + break; + } else if (!insn->src[0].src.is_ssa && insn->src[0].src.reg.reg->num_array_elems) { + LValues &newDefs = convert(&insn->dest); + nir_reg_src& reg = insn->src[0].src.reg; + auto goffset = regToLmemOffset[reg.reg->index]; + /* auto comps = reg.reg->num_components; */ + auto size = reg.reg->bit_size / 8; + auto csize = 4 * size; /* TODO after fixing MemoryOpts: comps * size; */ + auto aoffset = csize * reg.base_offset; + Value *indirect = nullptr; + + if (reg.indirect) + indirect = mkOp2v(OP_MUL, TYPE_U32, getSSA(4, FILE_ADDRESS), getSrc(reg.indirect, 0), mkImm(csize)); + + for (auto i = 0u; i < newDefs.size(); ++i) + loadFrom(FILE_MEMORY_LOCAL, 0, dType, newDefs[i], goffset + aoffset, i, indirect); + + break; + } else { + LValues &newDefs = convert(&insn->dest); + for (LValues::size_type c = 0u; c < newDefs.size(); ++c) { + mkMov(newDefs[c], getSrc(&insn->src[0], c), dType); + } + } + break; case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: { -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev