On Mon, Jan 11, 2016 at 7:20 PM, Ilia Mirkin <imir...@alum.mit.edu> wrote: > On Mon, Jan 11, 2016 at 7:12 PM, Matt Turner <matts...@gmail.com> wrote: >> From: Kenneth Graunke <kenn...@whitecape.org> >> >> We would like to be able to combine >> >> result.x = bitfieldInsert(src0.x, src1.x, src2.x, src3.x); >> result.y = bitfieldInsert(src0.y, src1.y, src2.y, src3.y); >> result.z = bitfieldInsert(src0.z, src1.z, src2.z, src3.z); >> result.w = bitfieldInsert(src0.w, src1.w, src2.w, src3.w); >> >> into a single ivec4 bitfieldInsert operation. This should be possible >> with most drivers. >> >> This patch changes the offset and bits parameters from scalar ints >> to ivecN or uvecN. The type of all four operands will be the same, >> for simplicity. >> >> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> >> Reviewed-by: Matt Turner <matts...@gmail.com> >> --- >> [mattst88 v3]: Make some constants unsigned in >> src/glsl/lower_packing_builtins.cpp >> >> src/glsl/builtin_functions.cpp | 8 +++++++- >> src/glsl/ir.h | 1 - >> src/glsl/ir_constant_expression.cpp | 6 +++--- >> src/glsl/ir_validate.cpp | 5 +++-- >> src/glsl/lower_instructions.cpp | 8 ++++---- >> src/glsl/lower_packing_builtins.cpp | 10 +++++----- >> src/glsl/nir/nir_opcodes.py | 4 ++-- >> 7 files changed, 24 insertions(+), 18 deletions(-) >> >> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp >> index 602852a..38383bc 100644 >> --- a/src/glsl/builtin_functions.cpp >> +++ b/src/glsl/builtin_functions.cpp >> @@ -4902,13 +4902,19 @@ builtin_builder::_bitfieldExtract(const glsl_type >> *type) >> ir_function_signature * >> builtin_builder::_bitfieldInsert(const glsl_type *type) >> { >> + bool is_uint = type->base_type == GLSL_TYPE_UINT; >> ir_variable *base = in_var(type, "base"); >> ir_variable *insert = in_var(type, "insert"); >> ir_variable *offset = in_var(glsl_type::int_type, "offset"); >> ir_variable *bits = in_var(glsl_type::int_type, "bits"); >> MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); >> >> - body.emit(ret(bitfield_insert(base, insert, offset, bits))); >> + operand cast_offset = is_uint ? i2u(offset) : operand(offset); >> + operand cast_bits = is_uint ? i2u(bits) : operand(bits); >> + >> + body.emit(ret(bitfield_insert(base, insert, >> + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), >> + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); >> >> return sig; >> } >> diff --git a/src/glsl/ir.h b/src/glsl/ir.h >> index a2eb508..9af2fc1 100644 >> --- a/src/glsl/ir.h >> +++ b/src/glsl/ir.h >> @@ -1710,7 +1710,6 @@ public: >> operation == ir_triop_vector_insert || >> operation == ir_quadop_vector || >> /* TODO: these can't currently be vectorized */ >> - operation == ir_quadop_bitfield_insert || >> operation == ir_triop_bitfield_extract; >> } >> >> diff --git a/src/glsl/ir_constant_expression.cpp >> b/src/glsl/ir_constant_expression.cpp >> index 38b6dd5..f5b5bd8 100644 >> --- a/src/glsl/ir_constant_expression.cpp >> +++ b/src/glsl/ir_constant_expression.cpp >> @@ -1710,10 +1710,10 @@ ir_expression::constant_expression_value(struct >> hash_table *variable_context) >> } >> >> case ir_quadop_bitfield_insert: { >> - int offset = op[2]->value.i[0]; >> - int bits = op[3]->value.i[0]; >> - >> for (unsigned c = 0; c < components; c++) { >> + int offset = op[2]->value.i[c]; >> + int bits = op[3]->value.i[c]; >> + >> if (bits == 0) >> data.u[c] = op[0]->value.u[c]; >> else if (offset < 0 || bits < 0) >> diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp >> index a4d6182..fea9b76 100644 >> --- a/src/glsl/ir_validate.cpp >> +++ b/src/glsl/ir_validate.cpp >> @@ -647,10 +647,11 @@ ir_validate::visit_leave(ir_expression *ir) >> break; >> >> case ir_quadop_bitfield_insert: >> + assert(ir->type->is_integer()); >> assert(ir->operands[0]->type == ir->type); >> assert(ir->operands[1]->type == ir->type); >> - assert(ir->operands[2]->type == glsl_type::int_type); >> - assert(ir->operands[3]->type == glsl_type::int_type); >> + assert(ir->operands[2]->type == ir->type); >> + assert(ir->operands[3]->type == ir->type); >> break; >> >> case ir_quadop_vector: >> diff --git a/src/glsl/lower_instructions.cpp >> b/src/glsl/lower_instructions.cpp >> index f70db87..d140be3 100644 >> --- a/src/glsl/lower_instructions.cpp >> +++ b/src/glsl/lower_instructions.cpp >> @@ -381,8 +381,8 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression >> *ir) >> >> ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); >> >> - ir_constant *exp_shift = new(ir) ir_constant(23); >> - ir_constant *exp_width = new(ir) ir_constant(8); >> + ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem); >> + ir_constant *exp_width = new(ir) ir_constant(8, vec_elem); >> >> /* Temporary variables */ >> ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); >> @@ -470,8 +470,8 @@ >> lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) >> >> ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); >> >> - ir_constant *exp_shift = new(ir) ir_constant(20); >> - ir_constant *exp_width = new(ir) ir_constant(11); >> + ir_constant *exp_shift = new(ir) ir_constant(20, vec_elem); >> + ir_constant *exp_width = new(ir) ir_constant(11, vec_elem); >> ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); >> >> /* Temporary variables */ >> diff --git a/src/glsl/lower_packing_builtins.cpp >> b/src/glsl/lower_packing_builtins.cpp >> index c8bf68b..19eeaa3 100644 >> --- a/src/glsl/lower_packing_builtins.cpp >> +++ b/src/glsl/lower_packing_builtins.cpp >> @@ -230,8 +230,8 @@ private: >> if (op_mask & LOWER_PACK_USE_BFI) { >> return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), >> swizzle_y(u), >> - constant(16), >> - constant(16)); >> + constant(16u), >> + constant(16u)); >> } >> >> /* return (u.y << 16) | (u.x & 0xffff); */ >> @@ -261,9 +261,9 @@ private: >> return bitfield_insert(bitfield_insert( >> bitfield_insert( >> bit_and(swizzle_x(u), >> constant(0xffu)), >> - swizzle_y(u), constant(8), >> constant(8)), >> - swizzle_z(u), constant(16), constant(8)), >> - swizzle_w(u), constant(24), constant(8)); >> + swizzle_y(u), constant(8u), >> constant(8u)), >> + swizzle_z(u), constant(16u), >> constant(8u)), >> + swizzle_w(u), constant(24u), constant(8u)); >> } >> >> /* uvec4 u = UVEC4_RVAL & 0xff */ >> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py >> index 398ae50..3780628 100644 >> --- a/src/glsl/nir/nir_opcodes.py >> +++ b/src/glsl/nir/nir_opcodes.py >> @@ -609,10 +609,10 @@ def quadop_horiz(name, output_size, src1_size, >> src2_size, src3_size, >> [tuint, tuint, tuint, tuint], >> "", const_expr) >> >> -opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1], >> +opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0], >> [tuint, tuint, tint, tint], "", """ >> unsigned base = src0, insert = src1; >> -int offset = src2.x, bits = src3.x; >> +int offset = src2, bits = src3; >> if (bits == 0) { >> dst = 0; >> } else if (offset < 0 || bits < 0 || bits + offset > 32) { > > If the sole purpose of this is documentation, I guess it's good > enough... but really it's "for each channel" sort of thing now. > Otherwise this patch is > > Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>
It's not just for documentation -- it's used to generate the constant folding code for bitfield_insert. But the constant folding stuff is smart enough to see that the size above is set to 0 (which is changed in this patch as well), which means "per-component," and it will loop over each component and set src2 and src3 (as well as src0 and src1) to the current component. > >> -- >> 2.4.9 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev