On Mon, 2016-01-25 at 15:18 -0800, Matt Turner wrote: > --- > src/glsl/nir/nir.h | 4 ++++ > src/glsl/nir/nir_lower_alu_to_scalar.c | 32 ++++++++++++++++++++++++++++++++ > src/glsl/nir/nir_opcodes.py | 10 ++++++++++ > src/glsl/nir/nir_opt_algebraic.py | 20 ++++++++++++++++++++ > 4 files changed, 66 insertions(+) > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 7b39cbb..bbd5b1a 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1469,6 +1469,10 @@ typedef struct nir_shader_compiler_options { > bool lower_ffract; > > bool lower_pack_half_2x16; > + bool lower_pack_unorm_2x16; > + bool lower_pack_snorm_2x16; > + bool lower_pack_unorm_4x8; > + bool lower_pack_snorm_4x8; > bool lower_unpack_half_2x16; > > bool lower_extract_byte; > diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c > b/src/glsl/nir/nir_lower_alu_to_scalar.c > index 5372fbe..37cb022 100644 > --- a/src/glsl/nir/nir_lower_alu_to_scalar.c > +++ b/src/glsl/nir/nir_lower_alu_to_scalar.c > @@ -134,6 +134,38 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder > *b) > return; > } > > + case nir_op_pack_uvec2_to_uint: { > + assert(b->shader->options->lower_pack_snorm_2x16 || > + b->shader->options->lower_pack_unorm_2x16);
So we only want opt_algebraic to generate these opcodes? Why? I see that this would be the case now, and it is true that there is probably no reason for other IR -> NIR translators to inject these opcodes directly without going through opt_algebraic, but do we need to prevent that with an assert? I am not against anyway, just curious. > + nir_ssa_def *word = > + nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); > + nir_ssa_def *val = > + nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), > + nir_channel(b, word, 0)); > + > + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); > + nir_instr_remove(&instr->instr); > + break; > + } > + > + case nir_op_pack_uvec4_to_uint: { > + assert(b->shader->options->lower_pack_snorm_4x8 || > + b->shader->options->lower_pack_unorm_4x8); > + > + nir_ssa_def *byte = > + nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); > + nir_ssa_def *val = > + nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), > nir_imm_int(b, 24)), > + nir_ishl(b, nir_channel(b, byte, 2), > nir_imm_int(b, 16))), > + nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), > nir_imm_int(b, 8)), > + nir_channel(b, byte, 0))); > + > + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); > + nir_instr_remove(&instr->instr); > + break; > + } > + > case nir_op_fdph: { > nir_ssa_def *sum[4]; > for (unsigned i = 0; i < 3; i++) { > diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py > index be3cd17..3b82c3c 100644 > --- a/src/glsl/nir/nir_opcodes.py > +++ b/src/glsl/nir/nir_opcodes.py > @@ -237,6 +237,16 @@ unpack_2x16("unorm") > unpack_4x8("unorm") > unpack_2x16("half") > > +unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """ > +dst = (src0.x & 0xffff) | (src0.y >> 16); > +""") This should've 1 as the output size instead of 0. > +unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """ > +dst = (src0.x << 0) | > + (src0.y << 8) | > + (src0.z << 16) | > + (src0.w << 24); > +""") Same here. > # Lowered floating point unpacking operations. > > diff --git a/src/glsl/nir/nir_opt_algebraic.py > b/src/glsl/nir/nir_opt_algebraic.py > index b761b54..56b0f5e 100644 > --- a/src/glsl/nir/nir_opt_algebraic.py > +++ b/src/glsl/nir/nir_opt_algebraic.py > @@ -258,6 +258,26 @@ optimizations = [ > (('extract_uword', a, b), > ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), > 'options->lower_extract_word'), > + > + (('pack_unorm_2x16', 'v'), > + ('pack_uvec2_to_uint', > + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), > + 'options->lower_pack_unorm_2x16'), > + > + (('pack_unorm_4x8', 'v'), > + ('pack_uvec4_to_uint', > + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), > + 'options->lower_pack_unorm_4x8'), > + > + (('pack_snorm_2x16', 'v'), > + ('pack_uvec2_to_uint', > + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), > 32767.0)))), > + 'options->lower_pack_snorm_2x16'), > + > + (('pack_snorm_4x8', 'v'), > + ('pack_uvec4_to_uint', > + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), > 127.0)))), > + 'options->lower_pack_snorm_4x8'), I think the pack_snorm_* opcodes need a i2u conversion at the end. That's what the GLSL IR lowering is doing and also what the spec [1] seems to indicate: "The functions packUnorm2x16(), packSnorm2x16(), packUnorm4x8(), and packSnorm4x8() first convert each component of a two- or four-component vector of normalized floating-point values into 8- or 16-bit integer values. Then, the results are packed into a 32-bit unsigned integer. The first component of the vector will be written to the least significant bits of the output; the last component will be written to the most significant bits." [1]https://www.opengl.org/registry/specs/ARB/shading_language_packing.txt > ] > > # Add optimizations to handle the case where the result of a ternary is _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev