v2: - Refactor conditions and shared function (Connor) - Move code to nir_eval_const_opcode() (Connor) - Don't flush to zero on fquantize2f16 From Vulkan spec, VK_KHR_shader_float_controls section:
"3) Do denorm and rounding mode controls apply to OpSpecConstantOp? RESOLVED: Yes, except when the opcode is OpQuantizeToF16." Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> --- src/compiler/nir/nir_constant_expressions.h | 3 +- src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++-- src/compiler/nir/nir_loop_analyze.c | 7 ++- src/compiler/nir/nir_opt_constant_folding.c | 15 ++--- src/compiler/spirv/spirv_to_nir.c | 3 +- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h index 1d6bbbc25d3..a2d416abc45 100644 --- a/src/compiler/nir/nir_constant_expressions.h +++ b/src/compiler/nir/nir_constant_expressions.h @@ -31,6 +31,7 @@ #include "nir.h" nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, - unsigned bit_size, nir_const_value *src); + unsigned bit_size, nir_const_value *src, + unsigned float_controls_execution_mode); #endif /* NIR_CONSTANT_EXPRESSIONS_H */ diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index 505cdd8baae..dc2132df0d0 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -66,6 +66,37 @@ template = """\ #include "util/bigmath.h" #include "nir_constant_expressions.h" +/** + * Checks if the provided value is a denorm and flushes it to zero. +*/ +static nir_const_value +constant_denorm_flush_to_zero(nir_const_value value, unsigned index, unsigned bit_size) +{ + switch(bit_size) { + case 64: + if (value.u64[index] < 0x0010000000000000) + value.u64[index] = 0; + if (value.u64[index] & 0x8000000000000000 && + !(value.u64[index] & 0x7ff0000000000000)) + value.u64[index] = 0x8000000000000000; + break; + case 32: + if (value.u32[index] < 0x00800000) + value.u32[index] = 0; + if (value.u32[index] & 0x80000000 && + !(value.u32[index] & 0x7f800000)) + value.u32[index] = 0x80000000; + break; + case 16: + if (value.u16[index] < 0x0400) + value.u16[index] = 0; + if (value.u16[index] & 0x8000 && + !(value.u16[index] & 0x7c00)) + value.u16[index] = 0x8000; + } + return value; +} + /** * Evaluate one component of packSnorm4x8. */ @@ -260,7 +291,7 @@ struct ${type}${width}_vec { % endfor % endfor -<%def name="evaluate_op(op, bit_size)"> +<%def name="evaluate_op(op, bit_size, execution_mode)"> <% output_type = type_add_size(op.output_type, bit_size) input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] @@ -277,6 +308,14 @@ struct ${type}${width}_vec { <% continue %> %endif + % for k in range(op.input_sizes[j]): + % if op.name != "fquantize2f16" and bit_size > 8 and op.input_types[j] == "float": + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { + _src[${j}] = constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size); + } + % endif + % endfor + const struct ${input_types[j]}_vec src${j} = { % for k in range(op.input_sizes[j]): % if input_types[j] == "int1": @@ -343,6 +382,12 @@ struct ${type}${width}_vec { % else: _dst_val.${get_const_field(output_type)}[_i] = dst; % endif + + % if op.name != "fquantize2f16" and bit_size > 8 and op.output_type == "float": + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { + _dst_val = constant_denorm_flush_to_zero(_dst_val, _i, bit_size); + } + % endif } % else: ## In the non-per-component case, create a struct dst with @@ -375,6 +420,12 @@ struct ${type}${width}_vec { % else: _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]}; % endif + + % if op.name != "fquantize2f16" and bit_size > 8 and op.output_type == "float": + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { + _dst_val = constant_denorm_flush_to_zero(_dst_val, ${k}, bit_size); + } + % endif % endfor % endif </%def> @@ -383,7 +434,8 @@ struct ${type}${width}_vec { static nir_const_value evaluate_${name}(MAYBE_UNUSED unsigned num_components, ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size, - MAYBE_UNUSED nir_const_value *_src) + MAYBE_UNUSED nir_const_value *_src, + MAYBE_UNUSED unsigned execution_mode) { nir_const_value _dst_val = { {0, } }; @@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components, switch (bit_size) { % for bit_size in op_bit_sizes(op): case ${bit_size}: { - ${evaluate_op(op, bit_size)} + ${evaluate_op(op, bit_size, execution_mode)} break; } % endfor @@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components, unreachable("unknown bit width"); } % else: - ${evaluate_op(op, 0)} + ${evaluate_op(op, 0, execution_mode)} % endif return _dst_val; @@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned num_components, nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, - unsigned bit_width, nir_const_value *src) + unsigned bit_width, nir_const_value *src, + unsigned float_controls_execution_mode) { switch (op) { % for name in sorted(opcodes.keys()): case nir_op_${name}: - return evaluate_${name}(num_components, bit_width, src); + return evaluate_${name}(num_components, bit_width, src, float_controls_execution_mode); % endfor default: unreachable("shouldn't get here"); diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 259f02a854e..c9fba8649db 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -497,19 +497,20 @@ test_iterations(int32_t iter_int, nir_const_value *step, */ nir_const_value mul_src[2] = { iter_src, *step }; nir_const_value mul_result = - nir_eval_const_opcode(mul_op, 1, bit_size, mul_src); + nir_eval_const_opcode(mul_op, 1, bit_size, mul_src, SHADER_DEFAULT_FLOAT_CONTROL_MODE); /* Add the initial value to the accumulated induction variable total */ nir_const_value add_src[2] = { mul_result, *initial }; nir_const_value add_result = - nir_eval_const_opcode(add_op, 1, bit_size, add_src); + nir_eval_const_opcode(add_op, 1, bit_size, add_src, SHADER_DEFAULT_FLOAT_CONTROL_MODE); nir_const_value src[2] = { { {0, } }, { {0, } } }; src[limit_rhs ? 0 : 1] = add_result; src[limit_rhs ? 1 : 0] = *limit; /* Evaluate the loop exit condition */ - nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src); + nir_const_value result = nir_eval_const_opcode(cond_op, 1, bit_size, src, + SHADER_DEFAULT_FLOAT_CONTROL_MODE); return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0); } diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c index 5097a3bcc36..bd6130d5b33 100644 --- a/src/compiler/nir/nir_opt_constant_folding.c +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -39,7 +39,7 @@ struct constant_fold_state { }; static bool -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned execution_mode) { nir_const_value src[NIR_MAX_VEC_COMPONENTS]; @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) nir_const_value dest = nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, - bit_size, src); + bit_size, src, execution_mode); nir_load_const_instr *new_instr = nir_load_const_instr_create(mem_ctx, @@ -161,14 +161,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) } static bool -constant_fold_block(nir_block *block, void *mem_ctx) +constant_fold_block(nir_block *block, void *mem_ctx, unsigned execution_mode) { bool progress = false; nir_foreach_instr_safe(instr, block) { switch (instr->type) { case nir_instr_type_alu: - progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx); + progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, execution_mode); break; case nir_instr_type_intrinsic: progress |= @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void *mem_ctx) } static bool -nir_opt_constant_folding_impl(nir_function_impl *impl) +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned execution_mode) { void *mem_ctx = ralloc_parent(impl); bool progress = false; nir_foreach_block(block, impl) { - progress |= constant_fold_block(block, mem_ctx); + progress |= constant_fold_block(block, mem_ctx, execution_mode); } if (progress) @@ -204,10 +204,11 @@ bool nir_opt_constant_folding(nir_shader *shader) { bool progress = false; + unsigned execution_mode = shader->info.shader_float_controls_execution_mode; nir_foreach_function(function, shader) { if (function->impl) - progress |= nir_opt_constant_folding_impl(function->impl); + progress |= nir_opt_constant_folding_impl(function->impl, execution_mode); } return progress; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 96d4d80970f..7578a83e2bb 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, } val->constant->values[0] = - nir_eval_const_opcode(op, num_components, bit_size, src); + nir_eval_const_opcode(op, num_components, bit_size, src, + b->shader->info.shader_float_controls_execution_mode); break; } /* default */ } -- 2.19.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev