From: Rob Clark <robcl...@freedesktop.org> The first pass would be used if you are not using nir_lower_indirect_deref's (which would be the case for ir3, since we do have indirect register access). This simply ensure there are no indirected input/outputs.
The second pass splits up array/matrix/struct inputs/outputs into primitive types. In the short term, we need this for gallium, since unread elements of a VS shader input (at granularity of vec4) will not be assigned space in the VBO. But Jason mentioned that it would also be useful for implementing linking in NIR. There are a few open TODO's in the 2nd patch, mostly because I wasn't really sure how i965 would like things to look in the type_size_scalar case. For an example of what this looks like in practice: varying vec4 m1[4]; varying vec4 m2[4]; attribute vec4 a1[4]; uniform int index; void main() { gl_Position = a1[0]; m1[0] = a1[0]; m1[1] = a1[1]; m1[2] = a1[2]; m1[3] = a1[0]; m2[0] = a1[index]; m2[1] = a1[index]; m2[2] = a1[index]; m2[3] = a1[index]; } becomes: shader: MESA_SHADER_VERTEX decl_var uniform INTERP_QUALIFIER_NONE int index (0, 0) decl_var shader_in INTERP_QUALIFIER_NONE vec4[4] a1 (VERT_ATTRIB_GENERIC0, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 gl_Position (VARYING_SLOT_POS, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4[4] m1 (VARYING_SLOT_VAR0, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4[4] m2 (VARYING_SLOT_VAR4, 0) decl_function main returning void impl main { block block_0: /* preds: */ vec4 ssa_0 = intrinsic load_var () (a1[0]) () vec4 ssa_1 = intrinsic load_var () (a1[0]) () vec4 ssa_2 = intrinsic load_var () (a1[1]) () vec4 ssa_3 = intrinsic load_var () (a1[2]) () vec4 ssa_4 = intrinsic load_var () (a1[0]) () vec1 ssa_5 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_6 = intrinsic load_uniform (ssa_5) () (0) /* base=0 */ /* index */ vec4 ssa_7 = intrinsic load_var () (a1[ssa_6]) () vec1 ssa_8 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_9 = intrinsic load_uniform (ssa_8) () (0) /* base=0 */ /* index */ vec4 ssa_10 = intrinsic load_var () (a1[ssa_9]) () vec1 ssa_11 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_12 = intrinsic load_uniform (ssa_11) () (0) /* base=0 */ /* index */ vec4 ssa_13 = intrinsic load_var () (a1[ssa_12]) () vec1 ssa_14 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_15 = intrinsic load_uniform (ssa_14) () (0) /* base=0 */ /* index */ vec4 ssa_16 = intrinsic load_var () (a1[ssa_15]) () intrinsic store_var (ssa_0) (gl_Position) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_1) (m1[0]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_2) (m1[1]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_3) (m1[2]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_4) (m1[3]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_7) (m2[0]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_10) (m2[1]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_13) (m2[2]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_16) (m2[3]) (15) /* wrmask=xyzw */ /* succs: block_1 */ block block_1: } and then after lower_indirect_io: shader: MESA_SHADER_VERTEX decl_var uniform INTERP_QUALIFIER_NONE int index (0, 0) decl_var shader_in INTERP_QUALIFIER_NONE vec4[4] a1 (VERT_ATTRIB_GENERIC0, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 gl_Position (VARYING_SLOT_POS, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4[4] m1 (VARYING_SLOT_VAR0, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4[4] m2 (VARYING_SLOT_VAR4, 0) decl_function main returning void impl main { decl_var INTERP_QUALIFIER_NONE vec4[4] shadow_a1 block block_0: /* preds: */ intrinsic copy_var () (shadow_a1, a1) () vec4 ssa_0 = intrinsic load_var () (shadow_a1[0]) () vec4 ssa_1 = intrinsic load_var () (shadow_a1[0]) () vec4 ssa_2 = intrinsic load_var () (shadow_a1[1]) () vec4 ssa_3 = intrinsic load_var () (shadow_a1[2]) () vec4 ssa_4 = intrinsic load_var () (shadow_a1[0]) () vec1 ssa_5 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_6 = intrinsic load_uniform (ssa_5) () (0) /* base=0 */ /* index */ vec4 ssa_7 = intrinsic load_var () (shadow_a1[ssa_6]) () vec1 ssa_8 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_9 = intrinsic load_uniform (ssa_8) () (0) /* base=0 */ /* index */ vec4 ssa_10 = intrinsic load_var () (shadow_a1[ssa_9]) () vec1 ssa_11 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_12 = intrinsic load_uniform (ssa_11) () (0) /* base=0 */ /* index */ vec4 ssa_13 = intrinsic load_var () (shadow_a1[ssa_12]) () vec1 ssa_14 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_15 = intrinsic load_uniform (ssa_14) () (0) /* base=0 */ /* index */ vec4 ssa_16 = intrinsic load_var () (shadow_a1[ssa_15]) () intrinsic store_var (ssa_0) (gl_Position) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_1) (m1[0]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_2) (m1[1]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_3) (m1[2]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_4) (m1[3]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_7) (m2[0]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_10) (m2[1]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_13) (m2[2]) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_16) (m2[3]) (15) /* wrmask=xyzw */ /* succs: block_1 */ block block_1: } and then after lower_io_types: shader: MESA_SHADER_VERTEX decl_var uniform INTERP_QUALIFIER_NONE int index (0, 0) decl_var shader_in INTERP_QUALIFIER_NONE vec4 a1_0 (VERT_ATTRIB_GENERIC0, 0) decl_var shader_in INTERP_QUALIFIER_NONE vec4 a1_1 (VERT_ATTRIB_GENERIC1, 0) decl_var shader_in INTERP_QUALIFIER_NONE vec4 a1_2 (VERT_ATTRIB_GENERIC2, 0) decl_var shader_in INTERP_QUALIFIER_NONE vec4 a1_3 (VERT_ATTRIB_GENERIC3, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 gl_Position (VARYING_SLOT_POS, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m1_0 (VARYING_SLOT_VAR0, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m1_1 (VARYING_SLOT_VAR1, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m1_2 (VARYING_SLOT_VAR2, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m1_3 (VARYING_SLOT_VAR3, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m2_0 (VARYING_SLOT_VAR4, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m2_1 (VARYING_SLOT_VAR5, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m2_2 (VARYING_SLOT_VAR6, 0) decl_var shader_out INTERP_QUALIFIER_NONE vec4 m2_3 (VARYING_SLOT_VAR7, 0) decl_function main returning void impl main { decl_var INTERP_QUALIFIER_NONE vec4[4] shadow_a1 block block_0: /* preds: */ vec4 ssa_17 = intrinsic load_var () (a1_0) () intrinsic store_var (ssa_17) (shadow_a1[0]) (15) /* wrmask=xyzw */ vec4 ssa_18 = intrinsic load_var () (a1_1) () intrinsic store_var (ssa_18) (shadow_a1[1]) (15) /* wrmask=xyzw */ vec4 ssa_19 = intrinsic load_var () (a1_2) () intrinsic store_var (ssa_19) (shadow_a1[2]) (15) /* wrmask=xyzw */ vec4 ssa_20 = intrinsic load_var () (a1_3) () intrinsic store_var (ssa_20) (shadow_a1[3]) (15) /* wrmask=xyzw */ vec4 ssa_0 = intrinsic load_var () (shadow_a1[0]) () vec4 ssa_1 = intrinsic load_var () (shadow_a1[0]) () vec4 ssa_2 = intrinsic load_var () (shadow_a1[1]) () vec4 ssa_3 = intrinsic load_var () (shadow_a1[2]) () vec4 ssa_4 = intrinsic load_var () (shadow_a1[0]) () vec1 ssa_5 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_6 = intrinsic load_uniform (ssa_5) () (0) /* base=0 */ /* index */ vec4 ssa_7 = intrinsic load_var () (shadow_a1[ssa_6]) () vec1 ssa_8 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_9 = intrinsic load_uniform (ssa_8) () (0) /* base=0 */ /* index */ vec4 ssa_10 = intrinsic load_var () (shadow_a1[ssa_9]) () vec1 ssa_11 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_12 = intrinsic load_uniform (ssa_11) () (0) /* base=0 */ /* index */ vec4 ssa_13 = intrinsic load_var () (shadow_a1[ssa_12]) () vec1 ssa_14 = load_const (0x00000000 /* 0.000000 */) vec1 ssa_15 = intrinsic load_uniform (ssa_14) () (0) /* base=0 */ /* index */ vec4 ssa_16 = intrinsic load_var () (shadow_a1[ssa_15]) () intrinsic store_var (ssa_0) (gl_Position) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_1) (m1_0) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_2) (m1_1) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_3) (m1_2) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_4) (m1_3) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_7) (m2_0) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_10) (m2_1) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_13) (m2_2) (15) /* wrmask=xyzw */ intrinsic store_var (ssa_16) (m2_3) (15) /* wrmask=xyzw */ /* succs: block_1 */ block block_1: } Rob Clark (2): nir: lower-indirect-io pass nir: lower-io-types pass src/compiler/Makefile.sources | 2 + src/compiler/nir/nir.h | 3 + src/compiler/nir/nir_lower_indirect_io.c | 151 ++++++++++++++++++++++++++ src/compiler/nir/nir_lower_io_types.c | 178 +++++++++++++++++++++++++++++++ 4 files changed, 334 insertions(+) create mode 100644 src/compiler/nir/nir_lower_indirect_io.c create mode 100644 src/compiler/nir/nir_lower_io_types.c -- 2.5.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev