Previously, we lowered direct uniform loads to dword offsets and indirect loads to byte offsets in vc4_nir_lower_io. However, it simplifies things a bit if we just use byte offsets for everything and then divide by 4 when we handle the direct uniform load. --- src/gallium/drivers/vc4/vc4_nir_lower_io.c | 17 +++++------------ src/gallium/drivers/vc4/vc4_program.c | 11 +++++++---- 2 files changed, 12 insertions(+), 16 deletions(-)
diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index 1afe52a..e3d018f 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -338,8 +338,8 @@ static void vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_intrinsic_instr *intr) { - /* All TGSI-to-NIR uniform loads are vec4, but we may create dword - * loads in our lowering passes. + /* All TGSI-to-NIR uniform loads are vec4, but we need byte offsets + * in the backend. */ if (intr->num_components == 1) return; @@ -355,6 +355,9 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, intr_comp->num_components = 1; nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, NULL); + /* Convert the base offset to bytes and add the component */ + intr_comp->const_index[0] = (intr->const_index[0] * 16 + i * 4); + if (intr->intrinsic == nir_intrinsic_load_uniform_indirect) { /* Convert the variable TGSI register index to a byte * offset. @@ -363,16 +366,6 @@ vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, nir_imm_int(b, 4))); - - /* Convert the offset to be a byte index, too. */ - intr_comp->const_index[0] = (intr->const_index[0] * 16 + - i * 4); - } else { - /* We want a dword index for non-indirect uniform - * loads. - */ - intr_comp->const_index[0] = (intr->const_index[0] * 4 + - i); } dests[i] = &intr_comp->dest.ssa; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 081adfd..5ce1143 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1440,6 +1440,7 @@ static void ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) { const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + int dword_offset; struct qreg *dest = NULL; if (info->has_dest) { @@ -1449,11 +1450,13 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: assert(instr->num_components == 1); - if (instr->const_index[0] < VC4_NIR_STATE_UNIFORM_OFFSET) { - *dest = qir_uniform(c, QUNIFORM_UNIFORM, - instr->const_index[0]); + /* The offset is in bytes, but we need dwords */ + assert(instr->const_index[0] % 4 == 0); + dword_offset = instr->const_index[0] / 4; + if (dword_offset < VC4_NIR_STATE_UNIFORM_OFFSET) { + *dest = qir_uniform(c, QUNIFORM_UNIFORM, dword_offset); } else { - *dest = qir_uniform(c, instr->const_index[0] - + *dest = qir_uniform(c, dword_offset - VC4_NIR_STATE_UNIFORM_OFFSET, 0); } -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev