nir_lower_alu_to_scalar() and nir_lower_load_const_to_scalar() handle most cases quite well. They also create nir_ssa_defs rather than ir_variables, which are much less memory intensive.
This can mean losing out on a few GLSL IR optimizations, however. In most cases, this is fine. But a few cases still benefit: - add/mul/dot still benefit from opt_algebraic()'s constant reassociation capabilities. - min/max still benefit from opt_minmax(). - comparisons seem to still benefit from opt_algebraic(), even though we also do most of them in nir_opt_algebraic_late(). With this change, shader-db statistics on Skylake are: total instructions in shared programs: 9107924 -> 9107347 (-0.01%) instructions in affected programs: 188830 -> 188253 (-0.31%) helped: 572 HURT: 154 total cycles in shared programs: 69176332 -> 69129860 (-0.07%) cycles in affected programs: 23460680 -> 23414208 (-0.20%) helped: 8102 HURT: 7146 Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> --- .../dri/i965/brw_fs_channel_expressions.cpp | 221 +++------------------ 1 file changed, 30 insertions(+), 191 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 21f0b70..ed0f679 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -72,15 +72,21 @@ channel_expressions_predicate(ir_instruction *ir) return false; switch (expr->operation) { - /* these opcodes need to act on the whole vector, - * just like texturing. - */ - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - return false; - default: + case ir_binop_mul: + case ir_binop_add: + case ir_binop_dot: + case ir_binop_min: + case ir_binop_max: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: break; + + default: + return false; } for (i = 0; i < expr->get_num_operands(); i++) { @@ -162,13 +168,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) return visit_continue; switch (expr->operation) { - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - return visit_continue; + case ir_binop_mul: + case ir_binop_add: + case ir_binop_dot: + case ir_binop_min: + case ir_binop_max: + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + break; default: - break; + return visit_continue; } /* Store the expression operands in temps so we can use them @@ -197,83 +211,13 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) /* OK, time to break down this vector operation. */ switch (expr->operation) { - case ir_unop_bit_not: - case ir_unop_logic_not: - case ir_unop_neg: - case ir_unop_abs: - case ir_unop_sign: - case ir_unop_rcp: - case ir_unop_rsq: - case ir_unop_sqrt: - case ir_unop_exp: - case ir_unop_log: - case ir_unop_exp2: - case ir_unop_log2: - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_f2i: - case ir_unop_bitcast_f2u: - case ir_unop_bitcast_u2f: - case ir_unop_i2u: - case ir_unop_u2i: - case ir_unop_f2i: - case ir_unop_f2u: - case ir_unop_i2f: - case ir_unop_f2b: - case ir_unop_b2f: - case ir_unop_i2b: - case ir_unop_b2i: - case ir_unop_u2f: - case ir_unop_trunc: - case ir_unop_ceil: - case ir_unop_floor: - case ir_unop_fract: - case ir_unop_round_even: - case ir_unop_sin: - case ir_unop_cos: - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - case ir_unop_bitfield_reverse: - case ir_unop_bit_count: - case ir_unop_find_msb: - case ir_unop_find_lsb: - case ir_unop_saturate: - case ir_unop_subroutine_to_int: - for (i = 0; i < vector_elements; i++) { - ir_rvalue *op0 = get_element(op_var[0], i); - - assign(ir, i, new(mem_ctx) ir_expression(expr->operation, - element_type, - op0, - NULL)); - } - break; - case ir_binop_add: - case ir_binop_sub: case ir_binop_mul: - case ir_binop_imul_high: - case ir_binop_div: - case ir_binop_carry: - case ir_binop_borrow: - case ir_binop_mod: case ir_binop_min: case ir_binop_max: - case ir_binop_pow: - case ir_binop_lshift: - case ir_binop_rshift: - case ir_binop_bit_and: - case ir_binop_bit_xor: - case ir_binop_bit_or: - case ir_binop_logic_and: - case ir_binop_logic_xor: - case ir_binop_logic_or: case ir_binop_less: - case ir_binop_greater: case ir_binop_lequal: + case ir_binop_greater: case ir_binop_gequal: case ir_binop_equal: case ir_binop_nequal: @@ -312,113 +256,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) break; } - case ir_binop_all_equal: - case ir_binop_any_nequal: { - ir_expression *last = NULL; - for (i = 0; i < vector_elements; i++) { - ir_rvalue *op0 = get_element(op_var[0], i); - ir_rvalue *op1 = get_element(op_var[1], i); - ir_expression *temp; - ir_expression_operation join; - - if (expr->operation == ir_binop_all_equal) - join = ir_binop_logic_and; - else - join = ir_binop_logic_or; - - temp = new(mem_ctx) ir_expression(expr->operation, - element_type, - op0, - op1); - if (last) { - last = new(mem_ctx) ir_expression(join, - element_type, - temp, - last); - } else { - last = temp; - } - } - assign(ir, 0, last); - break; - } - case ir_unop_noise: - unreachable("noise should have been broken down to function call"); - - case ir_binop_ubo_load: - case ir_unop_get_buffer_size: - unreachable("not yet supported"); - - case ir_triop_fma: - case ir_triop_lrp: - case ir_triop_csel: - case ir_triop_bitfield_extract: - for (i = 0; i < vector_elements; i++) { - ir_rvalue *op0 = get_element(op_var[0], i); - ir_rvalue *op1 = get_element(op_var[1], i); - ir_rvalue *op2 = get_element(op_var[2], i); - - assign(ir, i, new(mem_ctx) ir_expression(expr->operation, - element_type, - op0, - op1, - op2)); - } - break; - - case ir_quadop_bitfield_insert: - for (i = 0; i < vector_elements; i++) { - ir_rvalue *op0 = get_element(op_var[0], i); - ir_rvalue *op1 = get_element(op_var[1], i); - ir_rvalue *op2 = get_element(op_var[2], i); - ir_rvalue *op3 = get_element(op_var[3], i); - - assign(ir, i, new(mem_ctx) ir_expression(expr->operation, - element_type, - op0, - op1, - op2, - op3)); - } - break; - - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_pack_half_2x16: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_unorm_4x8: - case ir_unop_unpack_half_2x16: - case ir_binop_ldexp: - case ir_binop_vector_extract: - case ir_triop_vector_insert: - case ir_quadop_vector: - case ir_unop_ssbo_unsized_array_length: - unreachable("should have been lowered"); - - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - case ir_binop_pack_half_2x16_split: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - unreachable("not reached: expression operates on scalars only"); - - case ir_unop_pack_double_2x32: - case ir_unop_unpack_double_2x32: - case ir_unop_frexp_sig: - case ir_unop_frexp_exp: - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_d2i: - case ir_unop_i2d: - case ir_unop_d2u: - case ir_unop_u2d: - case ir_unop_d2b: - unreachable("no fp64 support yet"); + default: + unreachable("should have been skipped above"); } ir->remove(); -- 2.7.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev