On Jan 28, 2016 09:22, "Matt Turner" <matts...@gmail.com> wrote: > > On Thu, Jan 28, 2016 at 12:32 AM, Iago Toral <ito...@igalia.com> wrote: > > On Mon, 2016-01-25 at 15:18 -0800, Matt Turner wrote: > >> --- > >> src/glsl/nir/nir.h | 4 ++++ > >> src/glsl/nir/nir_lower_alu_to_scalar.c | 32 ++++++++++++++++++++++++++++++++ > >> src/glsl/nir/nir_opcodes.py | 10 ++++++++++ > >> src/glsl/nir/nir_opt_algebraic.py | 20 ++++++++++++++++++++ > >> 4 files changed, 66 insertions(+) > >> > >> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > >> index 7b39cbb..bbd5b1a 100644 > >> --- a/src/glsl/nir/nir.h > >> +++ b/src/glsl/nir/nir.h > >> @@ -1469,6 +1469,10 @@ typedef struct nir_shader_compiler_options { > >> bool lower_ffract; > >> > >> bool lower_pack_half_2x16; > >> + bool lower_pack_unorm_2x16; > >> + bool lower_pack_snorm_2x16; > >> + bool lower_pack_unorm_4x8; > >> + bool lower_pack_snorm_4x8; > >> bool lower_unpack_half_2x16; > >> > >> bool lower_extract_byte; > >> diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c > >> index 5372fbe..37cb022 100644 > >> --- a/src/glsl/nir/nir_lower_alu_to_scalar.c > >> +++ b/src/glsl/nir/nir_lower_alu_to_scalar.c > >> @@ -134,6 +134,38 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) > >> return; > >> } > >> > >> + case nir_op_pack_uvec2_to_uint: { > >> + assert(b->shader->options->lower_pack_snorm_2x16 || > >> + b->shader->options->lower_pack_unorm_2x16); > > > > So we only want opt_algebraic to generate these opcodes? Why? I see that > > this would be the case now, and it is true that there is probably no > > reason for other IR -> NIR translators to inject these opcodes directly > > without going through opt_algebraic, but do we need to prevent that with > > an assert? > > Not really necessary to assert, but I certainly don't expect there to > be other uses of these opcodes. > > > I am not against anyway, just curious. > > > >> + nir_ssa_def *word = > >> + nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); > >> + nir_ssa_def *val = > >> + nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)), > >> + nir_channel(b, word, 0)); > >> + > >> + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); > >> + nir_instr_remove(&instr->instr); > >> + break; > >> + } > >> + > >> + case nir_op_pack_uvec4_to_uint: { > >> + assert(b->shader->options->lower_pack_snorm_4x8 || > >> + b->shader->options->lower_pack_unorm_4x8); > >> + > >> + nir_ssa_def *byte = > >> + nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0)); > >> + nir_ssa_def *val = > >> + nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)), > >> + nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))), > >> + nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)), > >> + nir_channel(b, byte, 0))); > >> + > >> + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); > >> + nir_instr_remove(&instr->instr); > >> + break; > >> + } > >> + > >> case nir_op_fdph: { > >> nir_ssa_def *sum[4]; > >> for (unsigned i = 0; i < 3; i++) { > >> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py > >> index be3cd17..3b82c3c 100644 > >> --- a/src/glsl/nir/nir_opcodes.py > >> +++ b/src/glsl/nir/nir_opcodes.py > >> @@ -237,6 +237,16 @@ unpack_2x16("unorm") > >> unpack_4x8("unorm") > >> unpack_2x16("half") > >> > >> +unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """ > >> +dst = (src0.x & 0xffff) | (src0.y >> 16); > >> +""") > > > > This should've 1 as the output size instead of 0. > > I think there's a subtle (and perhaps meaningless) distinction between > 0 and 1. I think that 0 gives a scalar (uint) while 1 gives a > 1-component vector. The only difference I've seen is the use of dst vs > dst.x in the constant evaluation code. > > Do you know of any other differences or a reason to prefer 1 over 0?
That's not quite correct. 1 means exactly one component while a destination size of 0 means that it's a vectorizable operation. All 0-sized sources automatically take on the size of the destination. This is how most standard ALU opcodes work. > >> +unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """ > >> +dst = (src0.x << 0) | > >> + (src0.y << 8) | > >> + (src0.z << 16) | > >> + (src0.w << 24); > >> +""") > > > > Same here. > > > > > >> # Lowered floating point unpacking operations. > >> > >> diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py > >> index b761b54..56b0f5e 100644 > >> --- a/src/glsl/nir/nir_opt_algebraic.py > >> +++ b/src/glsl/nir/nir_opt_algebraic.py > >> @@ -258,6 +258,26 @@ optimizations = [ > >> (('extract_uword', a, b), > >> ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), > >> 'options->lower_extract_word'), > >> + > >> + (('pack_unorm_2x16', 'v'), > >> + ('pack_uvec2_to_uint', > >> + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), > >> + 'options->lower_pack_unorm_2x16'), > >> + > >> + (('pack_unorm_4x8', 'v'), > >> + ('pack_uvec4_to_uint', > >> + ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), > >> + 'options->lower_pack_unorm_4x8'), > >> + > >> + (('pack_snorm_2x16', 'v'), > >> + ('pack_uvec2_to_uint', > >> + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), > >> + 'options->lower_pack_snorm_2x16'), > >> + > >> + (('pack_snorm_4x8', 'v'), > >> + ('pack_uvec4_to_uint', > >> + ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), > >> + 'options->lower_pack_snorm_4x8'), > > > > I think the pack_snorm_* opcodes need a i2u conversion at the end. > > That's what the GLSL IR lowering is doing and also what the spec [1] > > seems to indicate: > > Right, but since NIR operands are typeless, there's nothing to do (NIR > doesn't even have i2u/u2i). > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev