On Mon, 2016-01-25 at 15:18 -0800, Matt Turner wrote: > The uint versions zero extend while the int versions sign extend. > --- > src/glsl/nir/nir.h | 3 +++ > src/glsl/nir/nir_opcodes.py | 9 +++++++++ > src/glsl/nir/nir_opt_algebraic.py | 16 ++++++++++++++++ > 3 files changed, 28 insertions(+) > > diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h > index 1113030..7b39cbb 100644 > --- a/src/glsl/nir/nir.h > +++ b/src/glsl/nir/nir.h > @@ -1471,6 +1471,9 @@ typedef struct nir_shader_compiler_options { > bool lower_pack_half_2x16; > bool lower_unpack_half_2x16; > > + bool lower_extract_byte; > + bool lower_extract_word; > + > /** > * Does the driver support real 32-bit integers? (Otherwise, integers > * are simulated by floats.) > diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py > index a8bbe1a..be3cd17 100644 > --- a/src/glsl/nir/nir_opcodes.py > +++ b/src/glsl/nir/nir_opcodes.py > @@ -536,6 +536,15 @@ dst.x = src0.x; > dst.y = src1.x; > """) > > +# Byte extraction > +binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))") > +binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))") > + > +# Word extraction > +binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))") > +binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))") > + > + > def triop(name, ty, const_expr): > opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) > def triop_horiz(name, output_size, src1_size, src2_size, src3_size, > const_expr): > diff --git a/src/glsl/nir/nir_opt_algebraic.py > b/src/glsl/nir/nir_opt_algebraic.py > index 7745b76..b761b54 100644 > --- a/src/glsl/nir/nir_opt_algebraic.py > +++ b/src/glsl/nir/nir_opt_algebraic.py > @@ -242,6 +242,22 @@ optimizations = [ > ('bcsel', ('ult', 31, 'bits'), 'value', > ('ubfe', 'value', 'offset', 'bits')), > 'options->lower_bitfield_extract'), > + > + (('extract_ibyte', a, b), > + ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8), > + 'options->lower_extract_byte'), > +
Is this correct? If I get this right, extract_ibyte(0x00ff0000, 2) should return 0xff, but this seems to do: (N << 8) >> 8 = N = 0x00ff0000 It seems like you would like to shr by 24 instead of 8. With this fixed: Reviewed-by: Iago Toral Quiroga <ito...@igalia.com> > + (('extract_ubyte', a, b), > + ('iand', ('ushr', a, ('imul', b, 8)), 0xff), > + 'options->lower_extract_byte'), > + > + (('extract_iword', a, b), > + ('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16), > + 'options->lower_extract_word'), > + > + (('extract_uword', a, b), > + ('iand', ('ushr', a, ('imul', b, 16)), 0xffff), > + 'options->lower_extract_word'), > ] > > # Add optimizations to handle the case where the result of a ternary is _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev