--- src/glsl/ir_optimization.h | 1 + src/glsl/lower_instructions.cpp | 82 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 0 deletions(-)
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 445dc49..c5405e5 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -39,6 +39,7 @@ #define LRP_TO_ARITH 0x80 #define BITFIELD_INSERT_TO_BFM_BFI 0x100 #define BITFIELD_INSERT_TO_BFM_BITOPS 0x200 +#define BFE_TO_BITOPS 0x400 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 1c1cad8..d49c419 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -40,6 +40,7 @@ * - LRP_TO_ARITH * - BITFIELD_INSERT_TO_BFM_BFI * - BITFIELD_INSERT_TO_BFM_BITOPS + * - BFE_TO_BITOPS * * SUB_TO_ADD_NEG: * --------------- @@ -106,6 +107,10 @@ * * It's difficult to use vector three-source instructions in i965's vertex * shader, so don't emit ir_triop_bfi, but rather bit operations. + * + * BFE_TO_BITOPS: + * -------------- + * Breaks ir_triop_bitfield_extract into bit operations (and, or, shift). */ #include "main/core.h" /* for M_LOG2E */ @@ -138,6 +143,7 @@ private: void lrp_to_arith(ir_expression *); void bitfield_insert_to_bfm_bfi(ir_expression *); void bitfield_insert_to_bfm_bitops(ir_expression *ir); + void bfe_to_bitops(ir_expression *); }; /** @@ -394,6 +400,77 @@ void lower_instructions_visitor::bitfield_insert_to_bfm_bitops(ir_expression *ir ir->operands[3] = NULL; } +void +lower_instructions_visitor::bfe_to_bitops(ir_expression *ir) +{ + /* Translates + * (bfe value offset bits) + * into + * (asr (shl value (- (- 32 bits) offset)) (- 32 bits)) + * + * which more simply is + * value <<= 32 - bits - offset; + * value >>= 32 - bits; // >>= is ASR. + * + * but some hardware (like i965) can only shift by 0-31, so the corner case + * of bits == 0 leads to shifting by 0 instead of 32. Instead, do + * if (bits == 0) + * result = 0; + * else { + * value <<= 32 - bits - offset; + * value >>= 32 - bits; // >>= is ASR. + * result = value; + * } + */ + + /* TODO: + * - Allow hardware that can shift by 32 to avoid the branch. + * - Allow skipping the lowering pass if type of <value> is scalar. + * - Emit (value >> offset) & (1 << bits) - 1) for unsigned <value>s. + */ + + ir_variable *value = new(ir) ir_variable(ir->operands[0]->type, "bfe_value", + ir_var_temporary); + this->base_ir->insert_before(value); + this->base_ir->insert_before(assign(value, ir->operands[0])); + + ir_variable *offset = new(ir) ir_variable(ir->operands[1]->type, "bfe_offset", + ir_var_temporary); + this->base_ir->insert_before(offset); + this->base_ir->insert_before(assign(offset, ir->operands[1])); + + ir_variable *bits = new(ir) ir_variable(ir->operands[2]->type, "bfe_bits", + ir_var_temporary); + this->base_ir->insert_before(bits); + this->base_ir->insert_before(assign(bits, ir->operands[2])); + + ir_constant *immed_32 = new(ir) ir_constant(32); + ir_variable *width_minus_bits = new(ir) ir_variable(glsl_type::int_type, + "width_minus_bits", + ir_var_temporary); + this->base_ir->insert_before(width_minus_bits); + this->base_ir->insert_before(assign(width_minus_bits, sub(immed_32, bits))); + + ir_variable *result = new(ir) ir_variable(ir->type, "bfe_result", + ir_var_temporary); + this->base_ir->insert_before(result); + + ir_if *bits_zero_if = if_tree(equal(bits, new(ir) ir_constant(0)), + assign(result, new(ir) ir_constant(0)), + assign(result, + rshift(lshift(value, swizzle_xxxx(sub(width_minus_bits, offset))), + swizzle_xxxx(width_minus_bits)))); + this->base_ir->insert_before(bits_zero_if); + + /* XXX: Seems like there should be a better way of doing this. */ + ir->operation = ir_binop_add; + ir->operands[0] = new(ir) ir_dereference_variable(result); + ir->operands[1] = new(ir) ir_constant(0); + ir->operands[2] = NULL; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -442,6 +519,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) bitfield_insert_to_bfm_bitops(ir); break; + case ir_triop_bitfield_extract: + if (lowering(BFE_TO_BITOPS)) + bfe_to_bitops(ir); + break; + default: return visit_continue; } -- 1.7.8.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev